From 39c5313ba581cfeb5d793df7c3cef21923473127 Mon Sep 17 00:00:00 2001 From: debian Date: Wed, 4 Mar 2026 16:32:09 -0500 Subject: [PATCH] fase(3): crawling module domain and application --- .ralph/fix_plan.md | 26 ++-- .../application/commands/StartCrawlCommand.js | 36 ++++++ .../application/commands/StopCrawlCommand.js | 27 ++++ .../application/queries/GetSessionQuery.js | 28 +++++ .../application/queries/ListSessionsQuery.js | 23 ++++ .../crawling/domain/entities/CrawlAction.js | 34 +++++ .../crawling/domain/entities/CrawlSession.js | 80 ++++++++++++ .../crawling/domain/entities/CrawlState.js | 31 +++++ .../crawling/domain/events/ActionExecuted.js | 14 +++ .../crawling/domain/events/CrawlCompleted.js | 14 +++ .../crawling/domain/events/CrawlFailed.js | 14 +++ .../crawling/domain/events/CrawlStarted.js | 14 +++ .../crawling/domain/events/StateDiscovered.js | 14 +++ .../domain/ports/ICrawlSessionRepository.js | 2 + .../crawling/domain/ports/ICrawlerEngine.js | 2 + .../crawling/domain/ports/IStateRepository.js | 2 + .../crawling/domain/value-objects/Selector.js | 20 +++ .../domain/value-objects/SessionStatus.js | 21 ++++ .../crawling/domain/value-objects/Url.js | 24 ++++ dist/modules/crawling/index.js | 26 ++++ .../application/commands/StartCrawlCommand.ts | 54 ++++++++ .../application/commands/StopCrawlCommand.ts | 37 ++++++ .../application/queries/GetSessionQuery.ts | 43 +++++++ .../application/queries/ListSessionsQuery.ts | 35 ++++++ .../crawling/domain/entities/CrawlAction.ts | 50 ++++++++ .../crawling/domain/entities/CrawlSession.ts | 119 ++++++++++++++++++ .../crawling/domain/entities/CrawlState.ts | 45 +++++++ .../crawling/domain/events/ActionExecuted.ts | 13 ++ .../crawling/domain/events/CrawlCompleted.ts | 13 ++ .../crawling/domain/events/CrawlFailed.ts | 13 ++ .../crawling/domain/events/CrawlStarted.ts | 13 ++ .../crawling/domain/events/StateDiscovered.ts | 13 ++ .../domain/ports/ICrawlSessionRepository.ts | 9 ++ .../crawling/domain/ports/ICrawlerEngine.ts | 9 ++ .../crawling/domain/ports/IStateRepository.ts | 10 ++ .../crawling/domain/value-objects/Selector.ts | 23 ++++ .../domain/value-objects/SessionStatus.ts | 27 ++++ .../crawling/domain/value-objects/Url.ts | 27 ++++ src/modules/crawling/index.ts | 10 ++ tests/modules/crawling.test.ts | 115 +++++++++++++++++ 40 files changed, 1117 insertions(+), 13 deletions(-) create mode 100644 dist/modules/crawling/application/commands/StartCrawlCommand.js create mode 100644 dist/modules/crawling/application/commands/StopCrawlCommand.js create mode 100644 dist/modules/crawling/application/queries/GetSessionQuery.js create mode 100644 dist/modules/crawling/application/queries/ListSessionsQuery.js create mode 100644 dist/modules/crawling/domain/entities/CrawlAction.js create mode 100644 dist/modules/crawling/domain/entities/CrawlSession.js create mode 100644 dist/modules/crawling/domain/entities/CrawlState.js create mode 100644 dist/modules/crawling/domain/events/ActionExecuted.js create mode 100644 dist/modules/crawling/domain/events/CrawlCompleted.js create mode 100644 dist/modules/crawling/domain/events/CrawlFailed.js create mode 100644 dist/modules/crawling/domain/events/CrawlStarted.js create mode 100644 dist/modules/crawling/domain/events/StateDiscovered.js create mode 100644 dist/modules/crawling/domain/ports/ICrawlSessionRepository.js create mode 100644 dist/modules/crawling/domain/ports/ICrawlerEngine.js create mode 100644 dist/modules/crawling/domain/ports/IStateRepository.js create mode 100644 dist/modules/crawling/domain/value-objects/Selector.js create mode 100644 dist/modules/crawling/domain/value-objects/SessionStatus.js create mode 100644 dist/modules/crawling/domain/value-objects/Url.js create mode 100644 dist/modules/crawling/index.js create mode 100644 src/modules/crawling/application/commands/StartCrawlCommand.ts create mode 100644 src/modules/crawling/application/commands/StopCrawlCommand.ts create mode 100644 src/modules/crawling/application/queries/GetSessionQuery.ts create mode 100644 src/modules/crawling/application/queries/ListSessionsQuery.ts create mode 100644 src/modules/crawling/domain/entities/CrawlAction.ts create mode 100644 src/modules/crawling/domain/entities/CrawlSession.ts create mode 100644 src/modules/crawling/domain/entities/CrawlState.ts create mode 100644 src/modules/crawling/domain/events/ActionExecuted.ts create mode 100644 src/modules/crawling/domain/events/CrawlCompleted.ts create mode 100644 src/modules/crawling/domain/events/CrawlFailed.ts create mode 100644 src/modules/crawling/domain/events/CrawlStarted.ts create mode 100644 src/modules/crawling/domain/events/StateDiscovered.ts create mode 100644 src/modules/crawling/domain/ports/ICrawlSessionRepository.ts create mode 100644 src/modules/crawling/domain/ports/ICrawlerEngine.ts create mode 100644 src/modules/crawling/domain/ports/IStateRepository.ts create mode 100644 src/modules/crawling/domain/value-objects/Selector.ts create mode 100644 src/modules/crawling/domain/value-objects/SessionStatus.ts create mode 100644 src/modules/crawling/domain/value-objects/Url.ts create mode 100644 src/modules/crawling/index.ts create mode 100644 tests/modules/crawling.test.ts diff --git a/.ralph/fix_plan.md b/.ralph/fix_plan.md index ffc454c..0a0c20f 100644 --- a/.ralph/fix_plan.md +++ b/.ralph/fix_plan.md @@ -57,21 +57,21 @@ Spec: `.ralph/specs/phase-02-shared-infrastructure.md` --- -## Phase 3: Crawling Module — Domain + Application [PENDIENTE] +## Phase 3: Crawling Module — Domain + Application [COMPLETO] Spec: `.ralph/specs/phase-03-crawling-domain.md` -- [ ] 3.1: Crear `src/modules/crawling/domain/entities/CrawlSession.ts` — AggregateRoot con url, status, seed, maxStates, statesVisited, config -- [ ] 3.2: Crear `src/modules/crawling/domain/entities/CrawlState.ts` — Entity con url, title, domSnapshot, visitCount -- [ ] 3.3: Crear `src/modules/crawling/domain/entities/CrawlAction.ts` — Entity con type, selector, value, seed, stateId, sequenceOrder -- [ ] 3.4: Crear value objects: `Url.ts`, `Selector.ts`, `SessionStatus.ts` (running/completed/failed/stopped) -- [ ] 3.5: Crear events: `CrawlStarted.ts`, `StateDiscovered.ts`, `ActionExecuted.ts`, `CrawlCompleted.ts`, `CrawlFailed.ts` -- [ ] 3.6: Crear ports: `ICrawlerEngine.ts` (launch/close/discoverActions/executeAction/captureState), `ICrawlSessionRepository.ts` (save/findById/findAll/update), `IStateRepository.ts` -- [ ] 3.7: Crear `application/commands/StartCrawlCommand.ts` — use case que valida config, crea CrawlSession, emite CrawlStarted -- [ ] 3.8: Crear `application/commands/StopCrawlCommand.ts` — use case que para sesión, emite CrawlCompleted -- [ ] 3.9: Crear `application/queries/GetSessionQuery.ts` y `ListSessionsQuery.ts` -- [ ] 3.10: Crear `modules/crawling/index.ts` — barrel export público -- [ ] 3.11: Tests: CrawlSession creation + domain events, StartCrawlCommand con mock repository -- [ ] 3.12: Verificar build + commit: `fase(3): crawling module domain and application` +- [x] 3.1: Crear `src/modules/crawling/domain/entities/CrawlSession.ts` — AggregateRoot con url, status, seed, maxStates, statesVisited, config +- [x] 3.2: Crear `src/modules/crawling/domain/entities/CrawlState.ts` — Entity con url, title, domSnapshot, visitCount +- [x] 3.3: Crear `src/modules/crawling/domain/entities/CrawlAction.ts` — Entity con type, selector, value, seed, stateId, sequenceOrder +- [x] 3.4: Crear value objects: `Url.ts`, `Selector.ts`, `SessionStatus.ts` (running/completed/failed/stopped) +- [x] 3.5: Crear events: `CrawlStarted.ts`, `StateDiscovered.ts`, `ActionExecuted.ts`, `CrawlCompleted.ts`, `CrawlFailed.ts` +- [x] 3.6: Crear ports: `ICrawlerEngine.ts` (launch/close/discoverActions/executeAction/captureState), `ICrawlSessionRepository.ts` (save/findById/findAll/update), `IStateRepository.ts` +- [x] 3.7: Crear `application/commands/StartCrawlCommand.ts` — use case que valida config, crea CrawlSession, emite CrawlStarted +- [x] 3.8: Crear `application/commands/StopCrawlCommand.ts` — use case que para sesión, emite CrawlCompleted +- [x] 3.9: Crear `application/queries/GetSessionQuery.ts` y `ListSessionsQuery.ts` +- [x] 3.10: Crear `modules/crawling/index.ts` — barrel export público +- [x] 3.11: Tests: CrawlSession creation + domain events, StartCrawlCommand con mock repository +- [x] 3.12: Verificar build + commit: `fase(3): crawling module domain and application` --- diff --git a/dist/modules/crawling/application/commands/StartCrawlCommand.js b/dist/modules/crawling/application/commands/StartCrawlCommand.js new file mode 100644 index 0000000..ab5d19e --- /dev/null +++ b/dist/modules/crawling/application/commands/StartCrawlCommand.js @@ -0,0 +1,36 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.StartCrawlCommand = void 0; +const Result_1 = require("../../../../shared/domain/Result"); +const Url_1 = require("../../domain/value-objects/Url"); +const CrawlSession_1 = require("../../domain/entities/CrawlSession"); +class StartCrawlCommand { + constructor(repository, eventBus) { + this.repository = repository; + this.eventBus = eventBus; + } + async execute(request) { + const urlResult = Url_1.Url.create(request.url); + if (!urlResult.ok) { + return (0, Result_1.Err)(urlResult.error); + } + const sessionResult = CrawlSession_1.CrawlSession.create({ + url: request.url, + seed: request.seed, + maxStates: request.maxStates, + config: request.config, + }); + if (!sessionResult.ok) { + return (0, Result_1.Err)(sessionResult.error); + } + const session = sessionResult.value; + await this.repository.save(session); + const events = session.domainEvents; + for (const event of events) { + await this.eventBus.publish(event); + } + session.clearEvents(); + return (0, Result_1.Ok)({ sessionId: session.id.toString() }); + } +} +exports.StartCrawlCommand = StartCrawlCommand; diff --git a/dist/modules/crawling/application/commands/StopCrawlCommand.js b/dist/modules/crawling/application/commands/StopCrawlCommand.js new file mode 100644 index 0000000..8f4c59d --- /dev/null +++ b/dist/modules/crawling/application/commands/StopCrawlCommand.js @@ -0,0 +1,27 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.StopCrawlCommand = void 0; +const Result_1 = require("../../../../shared/domain/Result"); +const UniqueId_1 = require("../../../../shared/domain/UniqueId"); +class StopCrawlCommand { + constructor(repository, eventBus) { + this.repository = repository; + this.eventBus = eventBus; + } + async execute(request) { + const id = UniqueId_1.UniqueId.from(request.sessionId); + const session = await this.repository.findById(id); + if (!session) { + return (0, Result_1.Err)('Session not found'); + } + session.stop(); + await this.repository.update(session); + const events = session.domainEvents; + for (const event of events) { + await this.eventBus.publish(event); + } + session.clearEvents(); + return (0, Result_1.Ok)(undefined); + } +} +exports.StopCrawlCommand = StopCrawlCommand; diff --git a/dist/modules/crawling/application/queries/GetSessionQuery.js b/dist/modules/crawling/application/queries/GetSessionQuery.js new file mode 100644 index 0000000..d9caa0b --- /dev/null +++ b/dist/modules/crawling/application/queries/GetSessionQuery.js @@ -0,0 +1,28 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.GetSessionQuery = void 0; +const Result_1 = require("../../../../shared/domain/Result"); +const UniqueId_1 = require("../../../../shared/domain/UniqueId"); +class GetSessionQuery { + constructor(repository) { + this.repository = repository; + } + async execute(request) { + const id = UniqueId_1.UniqueId.from(request.sessionId); + const session = await this.repository.findById(id); + if (!session) { + return (0, Result_1.Err)('Session not found'); + } + const dto = { + id: session.id.toString(), + url: session.url, + status: session.status, + seed: session.seed, + maxStates: session.maxStates, + statesVisited: session.statesVisited, + config: session.config, + }; + return (0, Result_1.Ok)(dto); + } +} +exports.GetSessionQuery = GetSessionQuery; diff --git a/dist/modules/crawling/application/queries/ListSessionsQuery.js b/dist/modules/crawling/application/queries/ListSessionsQuery.js new file mode 100644 index 0000000..f8fa6f2 --- /dev/null +++ b/dist/modules/crawling/application/queries/ListSessionsQuery.js @@ -0,0 +1,23 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ListSessionsQuery = void 0; +const Result_1 = require("../../../../shared/domain/Result"); +class ListSessionsQuery { + constructor(repository) { + this.repository = repository; + } + async execute(_request) { + const sessions = await this.repository.findAll(); + const dtos = sessions.map((session) => ({ + id: session.id.toString(), + url: session.url, + status: session.status, + seed: session.seed, + maxStates: session.maxStates, + statesVisited: session.statesVisited, + config: session.config, + })); + return (0, Result_1.Ok)(dtos); + } +} +exports.ListSessionsQuery = ListSessionsQuery; diff --git a/dist/modules/crawling/domain/entities/CrawlAction.js b/dist/modules/crawling/domain/entities/CrawlAction.js new file mode 100644 index 0000000..a105866 --- /dev/null +++ b/dist/modules/crawling/domain/entities/CrawlAction.js @@ -0,0 +1,34 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlAction = void 0; +const Entity_1 = require("../../../../shared/domain/Entity"); +class CrawlAction extends Entity_1.Entity { + constructor(props, id) { + super(props, id); + } + static create(props, id) { + return new CrawlAction(props, id); + } + get type() { + return this.props.type; + } + get selector() { + return this.props.selector; + } + get value() { + return this.props.value; + } + get seed() { + return this.props.seed; + } + get stateId() { + return this.props.stateId; + } + get sessionId() { + return this.props.sessionId; + } + get sequenceOrder() { + return this.props.sequenceOrder; + } +} +exports.CrawlAction = CrawlAction; diff --git a/dist/modules/crawling/domain/entities/CrawlSession.js b/dist/modules/crawling/domain/entities/CrawlSession.js new file mode 100644 index 0000000..dcd03bc --- /dev/null +++ b/dist/modules/crawling/domain/entities/CrawlSession.js @@ -0,0 +1,80 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlSession = void 0; +const AggregateRoot_1 = require("../../../../shared/domain/AggregateRoot"); +const Result_1 = require("../../../../shared/domain/Result"); +const Url_1 = require("../value-objects/Url"); +const CrawlStarted_1 = require("../events/CrawlStarted"); +const CrawlCompleted_1 = require("../events/CrawlCompleted"); +const CrawlFailed_1 = require("../events/CrawlFailed"); +class CrawlSession extends AggregateRoot_1.AggregateRoot { + constructor(props, id) { + super(props, id); + } + static create(request) { + const urlResult = Url_1.Url.create(request.url); + if (!urlResult.ok) { + return (0, Result_1.Err)(urlResult.error); + } + const props = { + url: request.url, + status: 'running', + seed: request.seed, + maxStates: request.maxStates, + statesVisited: 0, + config: request.config ?? {}, + }; + const session = new CrawlSession(props); + session.addDomainEvent(new CrawlStarted_1.CrawlStarted(session.id.toString(), { + url: request.url, + seed: request.seed, + maxStates: request.maxStates, + })); + return (0, Result_1.Ok)(session); + } + get url() { + return this.props.url; + } + get status() { + return this.props.status; + } + get seed() { + return this.props.seed; + } + get maxStates() { + return this.props.maxStates; + } + get statesVisited() { + return this.props.statesVisited; + } + get config() { + return this.props.config; + } + incrementStatesVisited() { + this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 }; + } + complete() { + this.props = { ...this.props, status: 'completed' }; + this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), { + url: this.props.url, + statesVisited: this.props.statesVisited, + })); + } + fail(reason) { + this.props = { ...this.props, status: 'failed' }; + this.addDomainEvent(new CrawlFailed_1.CrawlFailed(this.id.toString(), { + url: this.props.url, + reason, + statesVisited: this.props.statesVisited, + })); + } + stop() { + this.props = { ...this.props, status: 'stopped' }; + this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), { + url: this.props.url, + statesVisited: this.props.statesVisited, + stopped: true, + })); + } +} +exports.CrawlSession = CrawlSession; diff --git a/dist/modules/crawling/domain/entities/CrawlState.js b/dist/modules/crawling/domain/entities/CrawlState.js new file mode 100644 index 0000000..4d3112f --- /dev/null +++ b/dist/modules/crawling/domain/entities/CrawlState.js @@ -0,0 +1,31 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlState = void 0; +const Entity_1 = require("../../../../shared/domain/Entity"); +class CrawlState extends Entity_1.Entity { + constructor(props, id) { + super(props, id); + } + static create(props, id) { + return new CrawlState(props, id); + } + get url() { + return this.props.url; + } + get title() { + return this.props.title; + } + get domSnapshot() { + return this.props.domSnapshot; + } + get visitCount() { + return this.props.visitCount; + } + get stateId() { + return this.props.stateId; + } + get sessionId() { + return this.props.sessionId; + } +} +exports.CrawlState = CrawlState; diff --git a/dist/modules/crawling/domain/events/ActionExecuted.js b/dist/modules/crawling/domain/events/ActionExecuted.js new file mode 100644 index 0000000..17e450e --- /dev/null +++ b/dist/modules/crawling/domain/events/ActionExecuted.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ActionExecuted = void 0; +const crypto_1 = require("crypto"); +class ActionExecuted { + constructor(aggregateId, payload) { + this.aggregateId = aggregateId; + this.payload = payload; + this.eventId = (0, crypto_1.randomUUID)(); + this.eventName = 'crawl.action_executed'; + this.occurredOn = new Date(); + } +} +exports.ActionExecuted = ActionExecuted; diff --git a/dist/modules/crawling/domain/events/CrawlCompleted.js b/dist/modules/crawling/domain/events/CrawlCompleted.js new file mode 100644 index 0000000..df1dcc4 --- /dev/null +++ b/dist/modules/crawling/domain/events/CrawlCompleted.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlCompleted = void 0; +const crypto_1 = require("crypto"); +class CrawlCompleted { + constructor(aggregateId, payload) { + this.aggregateId = aggregateId; + this.payload = payload; + this.eventId = (0, crypto_1.randomUUID)(); + this.eventName = 'crawl.completed'; + this.occurredOn = new Date(); + } +} +exports.CrawlCompleted = CrawlCompleted; diff --git a/dist/modules/crawling/domain/events/CrawlFailed.js b/dist/modules/crawling/domain/events/CrawlFailed.js new file mode 100644 index 0000000..d04f794 --- /dev/null +++ b/dist/modules/crawling/domain/events/CrawlFailed.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlFailed = void 0; +const crypto_1 = require("crypto"); +class CrawlFailed { + constructor(aggregateId, payload) { + this.aggregateId = aggregateId; + this.payload = payload; + this.eventId = (0, crypto_1.randomUUID)(); + this.eventName = 'crawl.failed'; + this.occurredOn = new Date(); + } +} +exports.CrawlFailed = CrawlFailed; diff --git a/dist/modules/crawling/domain/events/CrawlStarted.js b/dist/modules/crawling/domain/events/CrawlStarted.js new file mode 100644 index 0000000..74aa554 --- /dev/null +++ b/dist/modules/crawling/domain/events/CrawlStarted.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlStarted = void 0; +const crypto_1 = require("crypto"); +class CrawlStarted { + constructor(aggregateId, payload) { + this.aggregateId = aggregateId; + this.payload = payload; + this.eventId = (0, crypto_1.randomUUID)(); + this.eventName = 'crawl.started'; + this.occurredOn = new Date(); + } +} +exports.CrawlStarted = CrawlStarted; diff --git a/dist/modules/crawling/domain/events/StateDiscovered.js b/dist/modules/crawling/domain/events/StateDiscovered.js new file mode 100644 index 0000000..2f2c579 --- /dev/null +++ b/dist/modules/crawling/domain/events/StateDiscovered.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.StateDiscovered = void 0; +const crypto_1 = require("crypto"); +class StateDiscovered { + constructor(aggregateId, payload) { + this.aggregateId = aggregateId; + this.payload = payload; + this.eventId = (0, crypto_1.randomUUID)(); + this.eventName = 'crawl.state_discovered'; + this.occurredOn = new Date(); + } +} +exports.StateDiscovered = StateDiscovered; diff --git a/dist/modules/crawling/domain/ports/ICrawlSessionRepository.js b/dist/modules/crawling/domain/ports/ICrawlSessionRepository.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/dist/modules/crawling/domain/ports/ICrawlSessionRepository.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/dist/modules/crawling/domain/ports/ICrawlerEngine.js b/dist/modules/crawling/domain/ports/ICrawlerEngine.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/dist/modules/crawling/domain/ports/ICrawlerEngine.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/dist/modules/crawling/domain/ports/IStateRepository.js b/dist/modules/crawling/domain/ports/IStateRepository.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/dist/modules/crawling/domain/ports/IStateRepository.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/dist/modules/crawling/domain/value-objects/Selector.js b/dist/modules/crawling/domain/value-objects/Selector.js new file mode 100644 index 0000000..5dc20eb --- /dev/null +++ b/dist/modules/crawling/domain/value-objects/Selector.js @@ -0,0 +1,20 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Selector = void 0; +const ValueObject_1 = require("../../../../shared/domain/ValueObject"); +const Result_1 = require("../../../../shared/domain/Result"); +class Selector extends ValueObject_1.ValueObject { + constructor(props) { + super(props); + } + static create(raw) { + if (!raw || raw.trim().length === 0) { + return (0, Result_1.Err)('Selector must not be empty'); + } + return (0, Result_1.Ok)(new Selector({ value: raw.trim() })); + } + toString() { + return this.props.value; + } +} +exports.Selector = Selector; diff --git a/dist/modules/crawling/domain/value-objects/SessionStatus.js b/dist/modules/crawling/domain/value-objects/SessionStatus.js new file mode 100644 index 0000000..0f37248 --- /dev/null +++ b/dist/modules/crawling/domain/value-objects/SessionStatus.js @@ -0,0 +1,21 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.SessionStatus = void 0; +const ValueObject_1 = require("../../../../shared/domain/ValueObject"); +const Result_1 = require("../../../../shared/domain/Result"); +const VALID_STATUSES = ['running', 'completed', 'failed', 'stopped']; +class SessionStatus extends ValueObject_1.ValueObject { + constructor(props) { + super(props); + } + static create(val) { + if (!VALID_STATUSES.includes(val)) { + return (0, Result_1.Err)(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`); + } + return (0, Result_1.Ok)(new SessionStatus({ value: val })); + } + getValue() { + return this.props.value; + } +} +exports.SessionStatus = SessionStatus; diff --git a/dist/modules/crawling/domain/value-objects/Url.js b/dist/modules/crawling/domain/value-objects/Url.js new file mode 100644 index 0000000..baec019 --- /dev/null +++ b/dist/modules/crawling/domain/value-objects/Url.js @@ -0,0 +1,24 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Url = void 0; +const ValueObject_1 = require("../../../../shared/domain/ValueObject"); +const Result_1 = require("../../../../shared/domain/Result"); +class Url extends ValueObject_1.ValueObject { + constructor(props) { + super(props); + } + static create(raw) { + if (!raw || raw.trim().length === 0) { + return (0, Result_1.Err)('URL must not be empty'); + } + const trimmed = raw.trim(); + if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) { + return (0, Result_1.Err)('URL must start with http:// or https://'); + } + return (0, Result_1.Ok)(new Url({ value: trimmed })); + } + toString() { + return this.props.value; + } +} +exports.Url = Url; diff --git a/dist/modules/crawling/index.js b/dist/modules/crawling/index.js new file mode 100644 index 0000000..604c016 --- /dev/null +++ b/dist/modules/crawling/index.js @@ -0,0 +1,26 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __exportStar = (this && this.__exportStar) || function(m, exports) { + for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +__exportStar(require("./domain/entities/CrawlSession"), exports); +__exportStar(require("./domain/entities/CrawlState"), exports); +__exportStar(require("./domain/entities/CrawlAction"), exports); +__exportStar(require("./domain/ports/ICrawlerEngine"), exports); +__exportStar(require("./domain/ports/ICrawlSessionRepository"), exports); +__exportStar(require("./domain/ports/IStateRepository"), exports); +__exportStar(require("./application/commands/StartCrawlCommand"), exports); +__exportStar(require("./application/commands/StopCrawlCommand"), exports); +__exportStar(require("./application/queries/GetSessionQuery"), exports); +__exportStar(require("./application/queries/ListSessionsQuery"), exports); diff --git a/src/modules/crawling/application/commands/StartCrawlCommand.ts b/src/modules/crawling/application/commands/StartCrawlCommand.ts new file mode 100644 index 0000000..ffe2843 --- /dev/null +++ b/src/modules/crawling/application/commands/StartCrawlCommand.ts @@ -0,0 +1,54 @@ +import { UseCase } from '../../../../shared/application/UseCase'; +import { EventBus } from '../../../../shared/application/EventBus'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; +import { Url } from '../../domain/value-objects/Url'; +import { CrawlSession } from '../../domain/entities/CrawlSession'; +import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository'; + +interface StartCrawlRequest { + url: string; + seed: number; + maxStates: number; + config?: Record; +} + +interface StartCrawlResponse { + sessionId: string; +} + +export class StartCrawlCommand implements UseCase { + constructor( + private readonly repository: ICrawlSessionRepository, + private readonly eventBus: EventBus + ) {} + + async execute(request: StartCrawlRequest): Promise> { + const urlResult = Url.create(request.url); + if (!urlResult.ok) { + return Err(urlResult.error); + } + + const sessionResult = CrawlSession.create({ + url: request.url, + seed: request.seed, + maxStates: request.maxStates, + config: request.config, + }); + + if (!sessionResult.ok) { + return Err(sessionResult.error); + } + + const session = sessionResult.value; + + await this.repository.save(session); + + const events = session.domainEvents; + for (const event of events) { + await this.eventBus.publish(event); + } + session.clearEvents(); + + return Ok({ sessionId: session.id.toString() }); + } +} diff --git a/src/modules/crawling/application/commands/StopCrawlCommand.ts b/src/modules/crawling/application/commands/StopCrawlCommand.ts new file mode 100644 index 0000000..e8cb973 --- /dev/null +++ b/src/modules/crawling/application/commands/StopCrawlCommand.ts @@ -0,0 +1,37 @@ +import { UseCase } from '../../../../shared/application/UseCase'; +import { EventBus } from '../../../../shared/application/EventBus'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; +import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository'; + +interface StopCrawlRequest { + sessionId: string; +} + +export class StopCrawlCommand implements UseCase { + constructor( + private readonly repository: ICrawlSessionRepository, + private readonly eventBus: EventBus + ) {} + + async execute(request: StopCrawlRequest): Promise> { + const id = UniqueId.from(request.sessionId); + const session = await this.repository.findById(id); + + if (!session) { + return Err('Session not found'); + } + + session.stop(); + + await this.repository.update(session); + + const events = session.domainEvents; + for (const event of events) { + await this.eventBus.publish(event); + } + session.clearEvents(); + + return Ok(undefined); + } +} diff --git a/src/modules/crawling/application/queries/GetSessionQuery.ts b/src/modules/crawling/application/queries/GetSessionQuery.ts new file mode 100644 index 0000000..508c84f --- /dev/null +++ b/src/modules/crawling/application/queries/GetSessionQuery.ts @@ -0,0 +1,43 @@ +import { UseCase } from '../../../../shared/application/UseCase'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; +import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository'; + +interface GetSessionRequest { + sessionId: string; +} + +interface SessionDTO { + id: string; + url: string; + status: string; + seed: number; + maxStates: number; + statesVisited: number; + config: Record; +} + +export class GetSessionQuery implements UseCase { + constructor(private readonly repository: ICrawlSessionRepository) {} + + async execute(request: GetSessionRequest): Promise> { + const id = UniqueId.from(request.sessionId); + const session = await this.repository.findById(id); + + if (!session) { + return Err('Session not found'); + } + + const dto: SessionDTO = { + id: session.id.toString(), + url: session.url, + status: session.status, + seed: session.seed, + maxStates: session.maxStates, + statesVisited: session.statesVisited, + config: session.config, + }; + + return Ok(dto); + } +} diff --git a/src/modules/crawling/application/queries/ListSessionsQuery.ts b/src/modules/crawling/application/queries/ListSessionsQuery.ts new file mode 100644 index 0000000..9db2faa --- /dev/null +++ b/src/modules/crawling/application/queries/ListSessionsQuery.ts @@ -0,0 +1,35 @@ +import { UseCase } from '../../../../shared/application/UseCase'; +import { Result, Ok } from '../../../../shared/domain/Result'; +import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository'; + +type ListSessionsRequest = Record; + +interface SessionDTO { + id: string; + url: string; + status: string; + seed: number; + maxStates: number; + statesVisited: number; + config: Record; +} + +export class ListSessionsQuery implements UseCase { + constructor(private readonly repository: ICrawlSessionRepository) {} + + async execute(_request: ListSessionsRequest): Promise> { + const sessions = await this.repository.findAll(); + + const dtos: SessionDTO[] = sessions.map((session) => ({ + id: session.id.toString(), + url: session.url, + status: session.status, + seed: session.seed, + maxStates: session.maxStates, + statesVisited: session.statesVisited, + config: session.config, + })); + + return Ok(dtos); + } +} diff --git a/src/modules/crawling/domain/entities/CrawlAction.ts b/src/modules/crawling/domain/entities/CrawlAction.ts new file mode 100644 index 0000000..14fec26 --- /dev/null +++ b/src/modules/crawling/domain/entities/CrawlAction.ts @@ -0,0 +1,50 @@ +import { Entity } from '../../../../shared/domain/Entity'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; + +interface CrawlActionProps { + type: string; + selector?: string; + value?: string; + seed: number; + stateId: string; + sessionId: string; + sequenceOrder: number; +} + +export class CrawlAction extends Entity { + private constructor(props: CrawlActionProps, id?: UniqueId) { + super(props, id); + } + + static create(props: CrawlActionProps, id?: UniqueId): CrawlAction { + return new CrawlAction(props, id); + } + + get type(): string { + return this.props.type; + } + + get selector(): string | undefined { + return this.props.selector; + } + + get value(): string | undefined { + return this.props.value; + } + + get seed(): number { + return this.props.seed; + } + + get stateId(): string { + return this.props.stateId; + } + + get sessionId(): string { + return this.props.sessionId; + } + + get sequenceOrder(): number { + return this.props.sequenceOrder; + } +} diff --git a/src/modules/crawling/domain/entities/CrawlSession.ts b/src/modules/crawling/domain/entities/CrawlSession.ts new file mode 100644 index 0000000..296824a --- /dev/null +++ b/src/modules/crawling/domain/entities/CrawlSession.ts @@ -0,0 +1,119 @@ +import { AggregateRoot } from '../../../../shared/domain/AggregateRoot'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; +import { Url } from '../value-objects/Url'; +import { CrawlStarted } from '../events/CrawlStarted'; +import { CrawlCompleted } from '../events/CrawlCompleted'; +import { CrawlFailed } from '../events/CrawlFailed'; + +type SessionStatusValue = 'running' | 'completed' | 'failed' | 'stopped'; + +interface CrawlSessionProps { + url: string; + status: SessionStatusValue; + seed: number; + maxStates: number; + statesVisited: number; + config: Record; +} + +export interface CreateCrawlSessionRequest { + url: string; + seed: number; + maxStates: number; + config?: Record; +} + +export class CrawlSession extends AggregateRoot { + private constructor(props: CrawlSessionProps, id?: UniqueId) { + super(props, id); + } + + static create(request: CreateCrawlSessionRequest): Result { + const urlResult = Url.create(request.url); + if (!urlResult.ok) { + return Err(urlResult.error); + } + + const props: CrawlSessionProps = { + url: request.url, + status: 'running', + seed: request.seed, + maxStates: request.maxStates, + statesVisited: 0, + config: request.config ?? {}, + }; + + const session = new CrawlSession(props); + + session.addDomainEvent( + new CrawlStarted(session.id.toString(), { + url: request.url, + seed: request.seed, + maxStates: request.maxStates, + }) + ); + + return Ok(session); + } + + get url(): string { + return this.props.url; + } + + get status(): SessionStatusValue { + return this.props.status; + } + + get seed(): number { + return this.props.seed; + } + + get maxStates(): number { + return this.props.maxStates; + } + + get statesVisited(): number { + return this.props.statesVisited; + } + + get config(): Record { + return this.props.config; + } + + incrementStatesVisited(): void { + this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 }; + } + + complete(): void { + this.props = { ...this.props, status: 'completed' }; + this.addDomainEvent( + new CrawlCompleted(this.id.toString(), { + url: this.props.url, + statesVisited: this.props.statesVisited, + }) + ); + } + + fail(reason: string): void { + this.props = { ...this.props, status: 'failed' }; + this.addDomainEvent( + new CrawlFailed(this.id.toString(), { + url: this.props.url, + reason, + statesVisited: this.props.statesVisited, + }) + ); + } + + stop(): void { + this.props = { ...this.props, status: 'stopped' }; + this.addDomainEvent( + new CrawlCompleted(this.id.toString(), { + url: this.props.url, + statesVisited: this.props.statesVisited, + stopped: true, + }) + ); + } +} diff --git a/src/modules/crawling/domain/entities/CrawlState.ts b/src/modules/crawling/domain/entities/CrawlState.ts new file mode 100644 index 0000000..dc6d471 --- /dev/null +++ b/src/modules/crawling/domain/entities/CrawlState.ts @@ -0,0 +1,45 @@ +import { Entity } from '../../../../shared/domain/Entity'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; + +interface CrawlStateProps { + url: string; + title: string; + domSnapshot: string; + visitCount: number; + stateId: string; + sessionId: string; +} + +export class CrawlState extends Entity { + private constructor(props: CrawlStateProps, id?: UniqueId) { + super(props, id); + } + + static create(props: CrawlStateProps, id?: UniqueId): CrawlState { + return new CrawlState(props, id); + } + + get url(): string { + return this.props.url; + } + + get title(): string { + return this.props.title; + } + + get domSnapshot(): string { + return this.props.domSnapshot; + } + + get visitCount(): number { + return this.props.visitCount; + } + + get stateId(): string { + return this.props.stateId; + } + + get sessionId(): string { + return this.props.sessionId; + } +} diff --git a/src/modules/crawling/domain/events/ActionExecuted.ts b/src/modules/crawling/domain/events/ActionExecuted.ts new file mode 100644 index 0000000..bf464c4 --- /dev/null +++ b/src/modules/crawling/domain/events/ActionExecuted.ts @@ -0,0 +1,13 @@ +import { randomUUID } from 'crypto'; +import { DomainEvent } from '../../../../shared/domain/DomainEvent'; + +export class ActionExecuted implements DomainEvent { + readonly eventId = randomUUID(); + readonly eventName = 'crawl.action_executed'; + readonly occurredOn = new Date(); + + constructor( + readonly aggregateId: string, + readonly payload: Record + ) {} +} diff --git a/src/modules/crawling/domain/events/CrawlCompleted.ts b/src/modules/crawling/domain/events/CrawlCompleted.ts new file mode 100644 index 0000000..5a8d0c4 --- /dev/null +++ b/src/modules/crawling/domain/events/CrawlCompleted.ts @@ -0,0 +1,13 @@ +import { randomUUID } from 'crypto'; +import { DomainEvent } from '../../../../shared/domain/DomainEvent'; + +export class CrawlCompleted implements DomainEvent { + readonly eventId = randomUUID(); + readonly eventName = 'crawl.completed'; + readonly occurredOn = new Date(); + + constructor( + readonly aggregateId: string, + readonly payload: Record + ) {} +} diff --git a/src/modules/crawling/domain/events/CrawlFailed.ts b/src/modules/crawling/domain/events/CrawlFailed.ts new file mode 100644 index 0000000..cefe293 --- /dev/null +++ b/src/modules/crawling/domain/events/CrawlFailed.ts @@ -0,0 +1,13 @@ +import { randomUUID } from 'crypto'; +import { DomainEvent } from '../../../../shared/domain/DomainEvent'; + +export class CrawlFailed implements DomainEvent { + readonly eventId = randomUUID(); + readonly eventName = 'crawl.failed'; + readonly occurredOn = new Date(); + + constructor( + readonly aggregateId: string, + readonly payload: Record + ) {} +} diff --git a/src/modules/crawling/domain/events/CrawlStarted.ts b/src/modules/crawling/domain/events/CrawlStarted.ts new file mode 100644 index 0000000..091183a --- /dev/null +++ b/src/modules/crawling/domain/events/CrawlStarted.ts @@ -0,0 +1,13 @@ +import { randomUUID } from 'crypto'; +import { DomainEvent } from '../../../../shared/domain/DomainEvent'; + +export class CrawlStarted implements DomainEvent { + readonly eventId = randomUUID(); + readonly eventName = 'crawl.started'; + readonly occurredOn = new Date(); + + constructor( + readonly aggregateId: string, + readonly payload: Record + ) {} +} diff --git a/src/modules/crawling/domain/events/StateDiscovered.ts b/src/modules/crawling/domain/events/StateDiscovered.ts new file mode 100644 index 0000000..806f133 --- /dev/null +++ b/src/modules/crawling/domain/events/StateDiscovered.ts @@ -0,0 +1,13 @@ +import { randomUUID } from 'crypto'; +import { DomainEvent } from '../../../../shared/domain/DomainEvent'; + +export class StateDiscovered implements DomainEvent { + readonly eventId = randomUUID(); + readonly eventName = 'crawl.state_discovered'; + readonly occurredOn = new Date(); + + constructor( + readonly aggregateId: string, + readonly payload: Record + ) {} +} diff --git a/src/modules/crawling/domain/ports/ICrawlSessionRepository.ts b/src/modules/crawling/domain/ports/ICrawlSessionRepository.ts new file mode 100644 index 0000000..50330b4 --- /dev/null +++ b/src/modules/crawling/domain/ports/ICrawlSessionRepository.ts @@ -0,0 +1,9 @@ +import { CrawlSession } from '../entities/CrawlSession'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; + +export interface ICrawlSessionRepository { + save(session: CrawlSession): Promise; + findById(id: UniqueId): Promise; + findAll(): Promise; + update(session: CrawlSession): Promise; +} diff --git a/src/modules/crawling/domain/ports/ICrawlerEngine.ts b/src/modules/crawling/domain/ports/ICrawlerEngine.ts new file mode 100644 index 0000000..5e5cb44 --- /dev/null +++ b/src/modules/crawling/domain/ports/ICrawlerEngine.ts @@ -0,0 +1,9 @@ +import { IState, IAction, IObservation } from '../../../../core/interfaces'; + +export interface ICrawlerEngine { + launch(url: string): Promise; + close(): Promise; + discoverActions(state: IState): Promise; + executeAction(action: IAction): Promise; + captureState(): Promise; +} diff --git a/src/modules/crawling/domain/ports/IStateRepository.ts b/src/modules/crawling/domain/ports/IStateRepository.ts new file mode 100644 index 0000000..293b6b3 --- /dev/null +++ b/src/modules/crawling/domain/ports/IStateRepository.ts @@ -0,0 +1,10 @@ +import { CrawlState } from '../entities/CrawlState'; +import { UniqueId } from '../../../../shared/domain/UniqueId'; + +export interface IStateRepository { + save(state: CrawlState): Promise; + findById(id: UniqueId): Promise; + findAll(): Promise; + findBySessionId(sessionId: string): Promise; + update(state: CrawlState): Promise; +} diff --git a/src/modules/crawling/domain/value-objects/Selector.ts b/src/modules/crawling/domain/value-objects/Selector.ts new file mode 100644 index 0000000..e9397b7 --- /dev/null +++ b/src/modules/crawling/domain/value-objects/Selector.ts @@ -0,0 +1,23 @@ +import { ValueObject } from '../../../../shared/domain/ValueObject'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; + +interface SelectorProps { + value: string; +} + +export class Selector extends ValueObject { + private constructor(props: SelectorProps) { + super(props); + } + + static create(raw: string): Result { + if (!raw || raw.trim().length === 0) { + return Err('Selector must not be empty'); + } + return Ok(new Selector({ value: raw.trim() })); + } + + toString(): string { + return this.props.value; + } +} diff --git a/src/modules/crawling/domain/value-objects/SessionStatus.ts b/src/modules/crawling/domain/value-objects/SessionStatus.ts new file mode 100644 index 0000000..a5ef8c0 --- /dev/null +++ b/src/modules/crawling/domain/value-objects/SessionStatus.ts @@ -0,0 +1,27 @@ +import { ValueObject } from '../../../../shared/domain/ValueObject'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; + +type StatusValue = 'running' | 'completed' | 'failed' | 'stopped'; + +interface SessionStatusProps { + value: StatusValue; +} + +const VALID_STATUSES: StatusValue[] = ['running', 'completed', 'failed', 'stopped']; + +export class SessionStatus extends ValueObject { + private constructor(props: SessionStatusProps) { + super(props); + } + + static create(val: string): Result { + if (!VALID_STATUSES.includes(val as StatusValue)) { + return Err(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`); + } + return Ok(new SessionStatus({ value: val as StatusValue })); + } + + getValue(): StatusValue { + return this.props.value; + } +} diff --git a/src/modules/crawling/domain/value-objects/Url.ts b/src/modules/crawling/domain/value-objects/Url.ts new file mode 100644 index 0000000..a98d5b2 --- /dev/null +++ b/src/modules/crawling/domain/value-objects/Url.ts @@ -0,0 +1,27 @@ +import { ValueObject } from '../../../../shared/domain/ValueObject'; +import { Result, Ok, Err } from '../../../../shared/domain/Result'; + +interface UrlProps { + value: string; +} + +export class Url extends ValueObject { + private constructor(props: UrlProps) { + super(props); + } + + static create(raw: string): Result { + if (!raw || raw.trim().length === 0) { + return Err('URL must not be empty'); + } + const trimmed = raw.trim(); + if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) { + return Err('URL must start with http:// or https://'); + } + return Ok(new Url({ value: trimmed })); + } + + toString(): string { + return this.props.value; + } +} diff --git a/src/modules/crawling/index.ts b/src/modules/crawling/index.ts new file mode 100644 index 0000000..3157777 --- /dev/null +++ b/src/modules/crawling/index.ts @@ -0,0 +1,10 @@ +export * from './domain/entities/CrawlSession'; +export * from './domain/entities/CrawlState'; +export * from './domain/entities/CrawlAction'; +export * from './domain/ports/ICrawlerEngine'; +export * from './domain/ports/ICrawlSessionRepository'; +export * from './domain/ports/IStateRepository'; +export * from './application/commands/StartCrawlCommand'; +export * from './application/commands/StopCrawlCommand'; +export * from './application/queries/GetSessionQuery'; +export * from './application/queries/ListSessionsQuery'; diff --git a/tests/modules/crawling.test.ts b/tests/modules/crawling.test.ts new file mode 100644 index 0000000..5916914 --- /dev/null +++ b/tests/modules/crawling.test.ts @@ -0,0 +1,115 @@ +import { CrawlSession } from '../../src/modules/crawling/domain/entities/CrawlSession'; +import { StartCrawlCommand } from '../../src/modules/crawling/application/commands/StartCrawlCommand'; +import { ICrawlSessionRepository } from '../../src/modules/crawling/domain/ports/ICrawlSessionRepository'; +import { EventBus } from '../../src/shared/application/EventBus'; +import { DomainEvent } from '../../src/shared/domain/DomainEvent'; +import { EventHandler } from '../../src/shared/application/EventHandler'; +import { UniqueId } from '../../src/shared/domain/UniqueId'; +import { isOk, isErr } from '../../src/shared/domain/Result'; + +// --- CrawlSession domain --- +describe('CrawlSession', () => { + it('create returns Ok for valid url', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 42, maxStates: 10 }); + expect(isOk(result)).toBe(true); + }); + + it('create returns Err for invalid url', () => { + const result = CrawlSession.create({ url: 'not-a-url', seed: 42, maxStates: 10 }); + expect(isErr(result)).toBe(true); + }); + + it('emits CrawlStarted event on create', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!result.ok) throw new Error('Expected Ok'); + const session = result.value; + expect(session.domainEvents).toHaveLength(1); + expect(session.domainEvents[0]?.eventName).toBe('crawl.started'); + }); + + it('status starts as running', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!result.ok) throw new Error('Expected Ok'); + expect(result.value.status).toBe('running'); + }); + + it('complete changes status and adds event', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!result.ok) throw new Error('Expected Ok'); + const session = result.value; + session.clearEvents(); + session.complete(); + expect(session.status).toBe('completed'); + expect(session.domainEvents[0]?.eventName).toBe('crawl.completed'); + }); + + it('fail changes status and adds CrawlFailed event', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!result.ok) throw new Error('Expected Ok'); + const session = result.value; + session.clearEvents(); + session.fail('browser crashed'); + expect(session.status).toBe('failed'); + expect(session.domainEvents[0]?.eventName).toBe('crawl.failed'); + }); + + it('incrementStatesVisited increments counter', () => { + const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!result.ok) throw new Error('Expected Ok'); + const session = result.value; + session.incrementStatesVisited(); + session.incrementStatesVisited(); + expect(session.statesVisited).toBe(2); + }); + + it('equals compares by id', () => { + const r1 = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + const r2 = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 }); + if (!r1.ok || !r2.ok) throw new Error('Expected Ok'); + expect(r1.value.equals(r2.value)).toBe(false); // different ids + }); +}); + +// --- StartCrawlCommand --- +describe('StartCrawlCommand', () => { + const makeMockRepo = (): ICrawlSessionRepository => { + const store = new Map(); + return { + save: async (session) => { store.set(session.id.toString(), session); }, + findById: async (id: UniqueId) => store.get(id.toString()) ?? null, + findAll: async () => [...store.values()], + update: async (session) => { store.set(session.id.toString(), session); }, + }; + }; + + const makeMockBus = (): EventBus & { events: DomainEvent[] } => { + const events: DomainEvent[] = []; + return { + events, + publish: async (event: DomainEvent) => { events.push(event); }, + subscribe: (_name: string, _handler: EventHandler) => {}, + }; + }; + + it('returns Ok with sessionId for valid url', async () => { + const cmd = new StartCrawlCommand(makeMockRepo(), makeMockBus()); + const result = await cmd.execute({ url: 'https://example.com', seed: 42, maxStates: 10 }); + expect(isOk(result)).toBe(true); + if (result.ok) { + expect(typeof result.value.sessionId).toBe('string'); + } + }); + + it('returns Err for invalid url', async () => { + const cmd = new StartCrawlCommand(makeMockRepo(), makeMockBus()); + const result = await cmd.execute({ url: 'not-a-url', seed: 42, maxStates: 10 }); + expect(isErr(result)).toBe(true); + }); + + it('publishes CrawlStarted event via EventBus', async () => { + const bus = makeMockBus(); + const cmd = new StartCrawlCommand(makeMockRepo(), bus); + await cmd.execute({ url: 'https://example.com', seed: 1, maxStates: 5 }); + expect(bus.events.some(e => e.eventName === 'crawl.started')).toBe(true); + }); +});