fase(3): crawling module domain and application
This commit is contained in:
@@ -57,21 +57,21 @@ Spec: `.ralph/specs/phase-02-shared-infrastructure.md`
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Crawling Module — Domain + Application [PENDIENTE]
|
||||
## Phase 3: Crawling Module — Domain + Application [COMPLETO]
|
||||
Spec: `.ralph/specs/phase-03-crawling-domain.md`
|
||||
|
||||
- [ ] 3.1: Crear `src/modules/crawling/domain/entities/CrawlSession.ts` — AggregateRoot con url, status, seed, maxStates, statesVisited, config
|
||||
- [ ] 3.2: Crear `src/modules/crawling/domain/entities/CrawlState.ts` — Entity con url, title, domSnapshot, visitCount
|
||||
- [ ] 3.3: Crear `src/modules/crawling/domain/entities/CrawlAction.ts` — Entity con type, selector, value, seed, stateId, sequenceOrder
|
||||
- [ ] 3.4: Crear value objects: `Url.ts`, `Selector.ts`, `SessionStatus.ts` (running/completed/failed/stopped)
|
||||
- [ ] 3.5: Crear events: `CrawlStarted.ts`, `StateDiscovered.ts`, `ActionExecuted.ts`, `CrawlCompleted.ts`, `CrawlFailed.ts`
|
||||
- [ ] 3.6: Crear ports: `ICrawlerEngine.ts` (launch/close/discoverActions/executeAction/captureState), `ICrawlSessionRepository.ts` (save/findById/findAll/update), `IStateRepository.ts`
|
||||
- [ ] 3.7: Crear `application/commands/StartCrawlCommand.ts` — use case que valida config, crea CrawlSession, emite CrawlStarted
|
||||
- [ ] 3.8: Crear `application/commands/StopCrawlCommand.ts` — use case que para sesión, emite CrawlCompleted
|
||||
- [ ] 3.9: Crear `application/queries/GetSessionQuery.ts` y `ListSessionsQuery.ts`
|
||||
- [ ] 3.10: Crear `modules/crawling/index.ts` — barrel export público
|
||||
- [ ] 3.11: Tests: CrawlSession creation + domain events, StartCrawlCommand con mock repository
|
||||
- [ ] 3.12: Verificar build + commit: `fase(3): crawling module domain and application`
|
||||
- [x] 3.1: Crear `src/modules/crawling/domain/entities/CrawlSession.ts` — AggregateRoot con url, status, seed, maxStates, statesVisited, config
|
||||
- [x] 3.2: Crear `src/modules/crawling/domain/entities/CrawlState.ts` — Entity con url, title, domSnapshot, visitCount
|
||||
- [x] 3.3: Crear `src/modules/crawling/domain/entities/CrawlAction.ts` — Entity con type, selector, value, seed, stateId, sequenceOrder
|
||||
- [x] 3.4: Crear value objects: `Url.ts`, `Selector.ts`, `SessionStatus.ts` (running/completed/failed/stopped)
|
||||
- [x] 3.5: Crear events: `CrawlStarted.ts`, `StateDiscovered.ts`, `ActionExecuted.ts`, `CrawlCompleted.ts`, `CrawlFailed.ts`
|
||||
- [x] 3.6: Crear ports: `ICrawlerEngine.ts` (launch/close/discoverActions/executeAction/captureState), `ICrawlSessionRepository.ts` (save/findById/findAll/update), `IStateRepository.ts`
|
||||
- [x] 3.7: Crear `application/commands/StartCrawlCommand.ts` — use case que valida config, crea CrawlSession, emite CrawlStarted
|
||||
- [x] 3.8: Crear `application/commands/StopCrawlCommand.ts` — use case que para sesión, emite CrawlCompleted
|
||||
- [x] 3.9: Crear `application/queries/GetSessionQuery.ts` y `ListSessionsQuery.ts`
|
||||
- [x] 3.10: Crear `modules/crawling/index.ts` — barrel export público
|
||||
- [x] 3.11: Tests: CrawlSession creation + domain events, StartCrawlCommand con mock repository
|
||||
- [x] 3.12: Verificar build + commit: `fase(3): crawling module domain and application`
|
||||
|
||||
---
|
||||
|
||||
|
||||
36
dist/modules/crawling/application/commands/StartCrawlCommand.js
vendored
Normal file
36
dist/modules/crawling/application/commands/StartCrawlCommand.js
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.StartCrawlCommand = void 0;
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
const Url_1 = require("../../domain/value-objects/Url");
|
||||
const CrawlSession_1 = require("../../domain/entities/CrawlSession");
|
||||
class StartCrawlCommand {
|
||||
constructor(repository, eventBus) {
|
||||
this.repository = repository;
|
||||
this.eventBus = eventBus;
|
||||
}
|
||||
async execute(request) {
|
||||
const urlResult = Url_1.Url.create(request.url);
|
||||
if (!urlResult.ok) {
|
||||
return (0, Result_1.Err)(urlResult.error);
|
||||
}
|
||||
const sessionResult = CrawlSession_1.CrawlSession.create({
|
||||
url: request.url,
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
config: request.config,
|
||||
});
|
||||
if (!sessionResult.ok) {
|
||||
return (0, Result_1.Err)(sessionResult.error);
|
||||
}
|
||||
const session = sessionResult.value;
|
||||
await this.repository.save(session);
|
||||
const events = session.domainEvents;
|
||||
for (const event of events) {
|
||||
await this.eventBus.publish(event);
|
||||
}
|
||||
session.clearEvents();
|
||||
return (0, Result_1.Ok)({ sessionId: session.id.toString() });
|
||||
}
|
||||
}
|
||||
exports.StartCrawlCommand = StartCrawlCommand;
|
||||
27
dist/modules/crawling/application/commands/StopCrawlCommand.js
vendored
Normal file
27
dist/modules/crawling/application/commands/StopCrawlCommand.js
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.StopCrawlCommand = void 0;
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
|
||||
class StopCrawlCommand {
|
||||
constructor(repository, eventBus) {
|
||||
this.repository = repository;
|
||||
this.eventBus = eventBus;
|
||||
}
|
||||
async execute(request) {
|
||||
const id = UniqueId_1.UniqueId.from(request.sessionId);
|
||||
const session = await this.repository.findById(id);
|
||||
if (!session) {
|
||||
return (0, Result_1.Err)('Session not found');
|
||||
}
|
||||
session.stop();
|
||||
await this.repository.update(session);
|
||||
const events = session.domainEvents;
|
||||
for (const event of events) {
|
||||
await this.eventBus.publish(event);
|
||||
}
|
||||
session.clearEvents();
|
||||
return (0, Result_1.Ok)(undefined);
|
||||
}
|
||||
}
|
||||
exports.StopCrawlCommand = StopCrawlCommand;
|
||||
28
dist/modules/crawling/application/queries/GetSessionQuery.js
vendored
Normal file
28
dist/modules/crawling/application/queries/GetSessionQuery.js
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.GetSessionQuery = void 0;
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
|
||||
class GetSessionQuery {
|
||||
constructor(repository) {
|
||||
this.repository = repository;
|
||||
}
|
||||
async execute(request) {
|
||||
const id = UniqueId_1.UniqueId.from(request.sessionId);
|
||||
const session = await this.repository.findById(id);
|
||||
if (!session) {
|
||||
return (0, Result_1.Err)('Session not found');
|
||||
}
|
||||
const dto = {
|
||||
id: session.id.toString(),
|
||||
url: session.url,
|
||||
status: session.status,
|
||||
seed: session.seed,
|
||||
maxStates: session.maxStates,
|
||||
statesVisited: session.statesVisited,
|
||||
config: session.config,
|
||||
};
|
||||
return (0, Result_1.Ok)(dto);
|
||||
}
|
||||
}
|
||||
exports.GetSessionQuery = GetSessionQuery;
|
||||
23
dist/modules/crawling/application/queries/ListSessionsQuery.js
vendored
Normal file
23
dist/modules/crawling/application/queries/ListSessionsQuery.js
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.ListSessionsQuery = void 0;
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
class ListSessionsQuery {
|
||||
constructor(repository) {
|
||||
this.repository = repository;
|
||||
}
|
||||
async execute(_request) {
|
||||
const sessions = await this.repository.findAll();
|
||||
const dtos = sessions.map((session) => ({
|
||||
id: session.id.toString(),
|
||||
url: session.url,
|
||||
status: session.status,
|
||||
seed: session.seed,
|
||||
maxStates: session.maxStates,
|
||||
statesVisited: session.statesVisited,
|
||||
config: session.config,
|
||||
}));
|
||||
return (0, Result_1.Ok)(dtos);
|
||||
}
|
||||
}
|
||||
exports.ListSessionsQuery = ListSessionsQuery;
|
||||
34
dist/modules/crawling/domain/entities/CrawlAction.js
vendored
Normal file
34
dist/modules/crawling/domain/entities/CrawlAction.js
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlAction = void 0;
|
||||
const Entity_1 = require("../../../../shared/domain/Entity");
|
||||
class CrawlAction extends Entity_1.Entity {
|
||||
constructor(props, id) {
|
||||
super(props, id);
|
||||
}
|
||||
static create(props, id) {
|
||||
return new CrawlAction(props, id);
|
||||
}
|
||||
get type() {
|
||||
return this.props.type;
|
||||
}
|
||||
get selector() {
|
||||
return this.props.selector;
|
||||
}
|
||||
get value() {
|
||||
return this.props.value;
|
||||
}
|
||||
get seed() {
|
||||
return this.props.seed;
|
||||
}
|
||||
get stateId() {
|
||||
return this.props.stateId;
|
||||
}
|
||||
get sessionId() {
|
||||
return this.props.sessionId;
|
||||
}
|
||||
get sequenceOrder() {
|
||||
return this.props.sequenceOrder;
|
||||
}
|
||||
}
|
||||
exports.CrawlAction = CrawlAction;
|
||||
80
dist/modules/crawling/domain/entities/CrawlSession.js
vendored
Normal file
80
dist/modules/crawling/domain/entities/CrawlSession.js
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlSession = void 0;
|
||||
const AggregateRoot_1 = require("../../../../shared/domain/AggregateRoot");
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
const Url_1 = require("../value-objects/Url");
|
||||
const CrawlStarted_1 = require("../events/CrawlStarted");
|
||||
const CrawlCompleted_1 = require("../events/CrawlCompleted");
|
||||
const CrawlFailed_1 = require("../events/CrawlFailed");
|
||||
class CrawlSession extends AggregateRoot_1.AggregateRoot {
|
||||
constructor(props, id) {
|
||||
super(props, id);
|
||||
}
|
||||
static create(request) {
|
||||
const urlResult = Url_1.Url.create(request.url);
|
||||
if (!urlResult.ok) {
|
||||
return (0, Result_1.Err)(urlResult.error);
|
||||
}
|
||||
const props = {
|
||||
url: request.url,
|
||||
status: 'running',
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
statesVisited: 0,
|
||||
config: request.config ?? {},
|
||||
};
|
||||
const session = new CrawlSession(props);
|
||||
session.addDomainEvent(new CrawlStarted_1.CrawlStarted(session.id.toString(), {
|
||||
url: request.url,
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
}));
|
||||
return (0, Result_1.Ok)(session);
|
||||
}
|
||||
get url() {
|
||||
return this.props.url;
|
||||
}
|
||||
get status() {
|
||||
return this.props.status;
|
||||
}
|
||||
get seed() {
|
||||
return this.props.seed;
|
||||
}
|
||||
get maxStates() {
|
||||
return this.props.maxStates;
|
||||
}
|
||||
get statesVisited() {
|
||||
return this.props.statesVisited;
|
||||
}
|
||||
get config() {
|
||||
return this.props.config;
|
||||
}
|
||||
incrementStatesVisited() {
|
||||
this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 };
|
||||
}
|
||||
complete() {
|
||||
this.props = { ...this.props, status: 'completed' };
|
||||
this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
statesVisited: this.props.statesVisited,
|
||||
}));
|
||||
}
|
||||
fail(reason) {
|
||||
this.props = { ...this.props, status: 'failed' };
|
||||
this.addDomainEvent(new CrawlFailed_1.CrawlFailed(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
reason,
|
||||
statesVisited: this.props.statesVisited,
|
||||
}));
|
||||
}
|
||||
stop() {
|
||||
this.props = { ...this.props, status: 'stopped' };
|
||||
this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
statesVisited: this.props.statesVisited,
|
||||
stopped: true,
|
||||
}));
|
||||
}
|
||||
}
|
||||
exports.CrawlSession = CrawlSession;
|
||||
31
dist/modules/crawling/domain/entities/CrawlState.js
vendored
Normal file
31
dist/modules/crawling/domain/entities/CrawlState.js
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlState = void 0;
|
||||
const Entity_1 = require("../../../../shared/domain/Entity");
|
||||
class CrawlState extends Entity_1.Entity {
|
||||
constructor(props, id) {
|
||||
super(props, id);
|
||||
}
|
||||
static create(props, id) {
|
||||
return new CrawlState(props, id);
|
||||
}
|
||||
get url() {
|
||||
return this.props.url;
|
||||
}
|
||||
get title() {
|
||||
return this.props.title;
|
||||
}
|
||||
get domSnapshot() {
|
||||
return this.props.domSnapshot;
|
||||
}
|
||||
get visitCount() {
|
||||
return this.props.visitCount;
|
||||
}
|
||||
get stateId() {
|
||||
return this.props.stateId;
|
||||
}
|
||||
get sessionId() {
|
||||
return this.props.sessionId;
|
||||
}
|
||||
}
|
||||
exports.CrawlState = CrawlState;
|
||||
14
dist/modules/crawling/domain/events/ActionExecuted.js
vendored
Normal file
14
dist/modules/crawling/domain/events/ActionExecuted.js
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.ActionExecuted = void 0;
|
||||
const crypto_1 = require("crypto");
|
||||
class ActionExecuted {
|
||||
constructor(aggregateId, payload) {
|
||||
this.aggregateId = aggregateId;
|
||||
this.payload = payload;
|
||||
this.eventId = (0, crypto_1.randomUUID)();
|
||||
this.eventName = 'crawl.action_executed';
|
||||
this.occurredOn = new Date();
|
||||
}
|
||||
}
|
||||
exports.ActionExecuted = ActionExecuted;
|
||||
14
dist/modules/crawling/domain/events/CrawlCompleted.js
vendored
Normal file
14
dist/modules/crawling/domain/events/CrawlCompleted.js
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlCompleted = void 0;
|
||||
const crypto_1 = require("crypto");
|
||||
class CrawlCompleted {
|
||||
constructor(aggregateId, payload) {
|
||||
this.aggregateId = aggregateId;
|
||||
this.payload = payload;
|
||||
this.eventId = (0, crypto_1.randomUUID)();
|
||||
this.eventName = 'crawl.completed';
|
||||
this.occurredOn = new Date();
|
||||
}
|
||||
}
|
||||
exports.CrawlCompleted = CrawlCompleted;
|
||||
14
dist/modules/crawling/domain/events/CrawlFailed.js
vendored
Normal file
14
dist/modules/crawling/domain/events/CrawlFailed.js
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlFailed = void 0;
|
||||
const crypto_1 = require("crypto");
|
||||
class CrawlFailed {
|
||||
constructor(aggregateId, payload) {
|
||||
this.aggregateId = aggregateId;
|
||||
this.payload = payload;
|
||||
this.eventId = (0, crypto_1.randomUUID)();
|
||||
this.eventName = 'crawl.failed';
|
||||
this.occurredOn = new Date();
|
||||
}
|
||||
}
|
||||
exports.CrawlFailed = CrawlFailed;
|
||||
14
dist/modules/crawling/domain/events/CrawlStarted.js
vendored
Normal file
14
dist/modules/crawling/domain/events/CrawlStarted.js
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlStarted = void 0;
|
||||
const crypto_1 = require("crypto");
|
||||
class CrawlStarted {
|
||||
constructor(aggregateId, payload) {
|
||||
this.aggregateId = aggregateId;
|
||||
this.payload = payload;
|
||||
this.eventId = (0, crypto_1.randomUUID)();
|
||||
this.eventName = 'crawl.started';
|
||||
this.occurredOn = new Date();
|
||||
}
|
||||
}
|
||||
exports.CrawlStarted = CrawlStarted;
|
||||
14
dist/modules/crawling/domain/events/StateDiscovered.js
vendored
Normal file
14
dist/modules/crawling/domain/events/StateDiscovered.js
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.StateDiscovered = void 0;
|
||||
const crypto_1 = require("crypto");
|
||||
class StateDiscovered {
|
||||
constructor(aggregateId, payload) {
|
||||
this.aggregateId = aggregateId;
|
||||
this.payload = payload;
|
||||
this.eventId = (0, crypto_1.randomUUID)();
|
||||
this.eventName = 'crawl.state_discovered';
|
||||
this.occurredOn = new Date();
|
||||
}
|
||||
}
|
||||
exports.StateDiscovered = StateDiscovered;
|
||||
2
dist/modules/crawling/domain/ports/ICrawlSessionRepository.js
vendored
Normal file
2
dist/modules/crawling/domain/ports/ICrawlSessionRepository.js
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
2
dist/modules/crawling/domain/ports/ICrawlerEngine.js
vendored
Normal file
2
dist/modules/crawling/domain/ports/ICrawlerEngine.js
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
2
dist/modules/crawling/domain/ports/IStateRepository.js
vendored
Normal file
2
dist/modules/crawling/domain/ports/IStateRepository.js
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
20
dist/modules/crawling/domain/value-objects/Selector.js
vendored
Normal file
20
dist/modules/crawling/domain/value-objects/Selector.js
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.Selector = void 0;
|
||||
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
class Selector extends ValueObject_1.ValueObject {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
}
|
||||
static create(raw) {
|
||||
if (!raw || raw.trim().length === 0) {
|
||||
return (0, Result_1.Err)('Selector must not be empty');
|
||||
}
|
||||
return (0, Result_1.Ok)(new Selector({ value: raw.trim() }));
|
||||
}
|
||||
toString() {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
exports.Selector = Selector;
|
||||
21
dist/modules/crawling/domain/value-objects/SessionStatus.js
vendored
Normal file
21
dist/modules/crawling/domain/value-objects/SessionStatus.js
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.SessionStatus = void 0;
|
||||
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
const VALID_STATUSES = ['running', 'completed', 'failed', 'stopped'];
|
||||
class SessionStatus extends ValueObject_1.ValueObject {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
}
|
||||
static create(val) {
|
||||
if (!VALID_STATUSES.includes(val)) {
|
||||
return (0, Result_1.Err)(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`);
|
||||
}
|
||||
return (0, Result_1.Ok)(new SessionStatus({ value: val }));
|
||||
}
|
||||
getValue() {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
exports.SessionStatus = SessionStatus;
|
||||
24
dist/modules/crawling/domain/value-objects/Url.js
vendored
Normal file
24
dist/modules/crawling/domain/value-objects/Url.js
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.Url = void 0;
|
||||
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
|
||||
const Result_1 = require("../../../../shared/domain/Result");
|
||||
class Url extends ValueObject_1.ValueObject {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
}
|
||||
static create(raw) {
|
||||
if (!raw || raw.trim().length === 0) {
|
||||
return (0, Result_1.Err)('URL must not be empty');
|
||||
}
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) {
|
||||
return (0, Result_1.Err)('URL must start with http:// or https://');
|
||||
}
|
||||
return (0, Result_1.Ok)(new Url({ value: trimmed }));
|
||||
}
|
||||
toString() {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
exports.Url = Url;
|
||||
26
dist/modules/crawling/index.js
vendored
Normal file
26
dist/modules/crawling/index.js
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
"use strict";
|
||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
var desc = Object.getOwnPropertyDescriptor(m, k);
|
||||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
||||
desc = { enumerable: true, get: function() { return m[k]; } };
|
||||
}
|
||||
Object.defineProperty(o, k2, desc);
|
||||
}) : (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
o[k2] = m[k];
|
||||
}));
|
||||
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
||||
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
__exportStar(require("./domain/entities/CrawlSession"), exports);
|
||||
__exportStar(require("./domain/entities/CrawlState"), exports);
|
||||
__exportStar(require("./domain/entities/CrawlAction"), exports);
|
||||
__exportStar(require("./domain/ports/ICrawlerEngine"), exports);
|
||||
__exportStar(require("./domain/ports/ICrawlSessionRepository"), exports);
|
||||
__exportStar(require("./domain/ports/IStateRepository"), exports);
|
||||
__exportStar(require("./application/commands/StartCrawlCommand"), exports);
|
||||
__exportStar(require("./application/commands/StopCrawlCommand"), exports);
|
||||
__exportStar(require("./application/queries/GetSessionQuery"), exports);
|
||||
__exportStar(require("./application/queries/ListSessionsQuery"), exports);
|
||||
@@ -0,0 +1,54 @@
|
||||
import { UseCase } from '../../../../shared/application/UseCase';
|
||||
import { EventBus } from '../../../../shared/application/EventBus';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
import { Url } from '../../domain/value-objects/Url';
|
||||
import { CrawlSession } from '../../domain/entities/CrawlSession';
|
||||
import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository';
|
||||
|
||||
interface StartCrawlRequest {
|
||||
url: string;
|
||||
seed: number;
|
||||
maxStates: number;
|
||||
config?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface StartCrawlResponse {
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
export class StartCrawlCommand implements UseCase<StartCrawlRequest, StartCrawlResponse, string> {
|
||||
constructor(
|
||||
private readonly repository: ICrawlSessionRepository,
|
||||
private readonly eventBus: EventBus
|
||||
) {}
|
||||
|
||||
async execute(request: StartCrawlRequest): Promise<Result<StartCrawlResponse, string>> {
|
||||
const urlResult = Url.create(request.url);
|
||||
if (!urlResult.ok) {
|
||||
return Err(urlResult.error);
|
||||
}
|
||||
|
||||
const sessionResult = CrawlSession.create({
|
||||
url: request.url,
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
config: request.config,
|
||||
});
|
||||
|
||||
if (!sessionResult.ok) {
|
||||
return Err(sessionResult.error);
|
||||
}
|
||||
|
||||
const session = sessionResult.value;
|
||||
|
||||
await this.repository.save(session);
|
||||
|
||||
const events = session.domainEvents;
|
||||
for (const event of events) {
|
||||
await this.eventBus.publish(event);
|
||||
}
|
||||
session.clearEvents();
|
||||
|
||||
return Ok({ sessionId: session.id.toString() });
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
import { UseCase } from '../../../../shared/application/UseCase';
|
||||
import { EventBus } from '../../../../shared/application/EventBus';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository';
|
||||
|
||||
interface StopCrawlRequest {
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
export class StopCrawlCommand implements UseCase<StopCrawlRequest, void, string> {
|
||||
constructor(
|
||||
private readonly repository: ICrawlSessionRepository,
|
||||
private readonly eventBus: EventBus
|
||||
) {}
|
||||
|
||||
async execute(request: StopCrawlRequest): Promise<Result<void, string>> {
|
||||
const id = UniqueId.from(request.sessionId);
|
||||
const session = await this.repository.findById(id);
|
||||
|
||||
if (!session) {
|
||||
return Err('Session not found');
|
||||
}
|
||||
|
||||
session.stop();
|
||||
|
||||
await this.repository.update(session);
|
||||
|
||||
const events = session.domainEvents;
|
||||
for (const event of events) {
|
||||
await this.eventBus.publish(event);
|
||||
}
|
||||
session.clearEvents();
|
||||
|
||||
return Ok(undefined);
|
||||
}
|
||||
}
|
||||
43
src/modules/crawling/application/queries/GetSessionQuery.ts
Normal file
43
src/modules/crawling/application/queries/GetSessionQuery.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { UseCase } from '../../../../shared/application/UseCase';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository';
|
||||
|
||||
interface GetSessionRequest {
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
interface SessionDTO {
|
||||
id: string;
|
||||
url: string;
|
||||
status: string;
|
||||
seed: number;
|
||||
maxStates: number;
|
||||
statesVisited: number;
|
||||
config: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export class GetSessionQuery implements UseCase<GetSessionRequest, SessionDTO, string> {
|
||||
constructor(private readonly repository: ICrawlSessionRepository) {}
|
||||
|
||||
async execute(request: GetSessionRequest): Promise<Result<SessionDTO, string>> {
|
||||
const id = UniqueId.from(request.sessionId);
|
||||
const session = await this.repository.findById(id);
|
||||
|
||||
if (!session) {
|
||||
return Err('Session not found');
|
||||
}
|
||||
|
||||
const dto: SessionDTO = {
|
||||
id: session.id.toString(),
|
||||
url: session.url,
|
||||
status: session.status,
|
||||
seed: session.seed,
|
||||
maxStates: session.maxStates,
|
||||
statesVisited: session.statesVisited,
|
||||
config: session.config,
|
||||
};
|
||||
|
||||
return Ok(dto);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import { UseCase } from '../../../../shared/application/UseCase';
|
||||
import { Result, Ok } from '../../../../shared/domain/Result';
|
||||
import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository';
|
||||
|
||||
type ListSessionsRequest = Record<string, never>;
|
||||
|
||||
interface SessionDTO {
|
||||
id: string;
|
||||
url: string;
|
||||
status: string;
|
||||
seed: number;
|
||||
maxStates: number;
|
||||
statesVisited: number;
|
||||
config: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export class ListSessionsQuery implements UseCase<ListSessionsRequest, SessionDTO[], string> {
|
||||
constructor(private readonly repository: ICrawlSessionRepository) {}
|
||||
|
||||
async execute(_request: ListSessionsRequest): Promise<Result<SessionDTO[], string>> {
|
||||
const sessions = await this.repository.findAll();
|
||||
|
||||
const dtos: SessionDTO[] = sessions.map((session) => ({
|
||||
id: session.id.toString(),
|
||||
url: session.url,
|
||||
status: session.status,
|
||||
seed: session.seed,
|
||||
maxStates: session.maxStates,
|
||||
statesVisited: session.statesVisited,
|
||||
config: session.config,
|
||||
}));
|
||||
|
||||
return Ok(dtos);
|
||||
}
|
||||
}
|
||||
50
src/modules/crawling/domain/entities/CrawlAction.ts
Normal file
50
src/modules/crawling/domain/entities/CrawlAction.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { Entity } from '../../../../shared/domain/Entity';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
|
||||
interface CrawlActionProps {
|
||||
type: string;
|
||||
selector?: string;
|
||||
value?: string;
|
||||
seed: number;
|
||||
stateId: string;
|
||||
sessionId: string;
|
||||
sequenceOrder: number;
|
||||
}
|
||||
|
||||
export class CrawlAction extends Entity<CrawlActionProps> {
|
||||
private constructor(props: CrawlActionProps, id?: UniqueId) {
|
||||
super(props, id);
|
||||
}
|
||||
|
||||
static create(props: CrawlActionProps, id?: UniqueId): CrawlAction {
|
||||
return new CrawlAction(props, id);
|
||||
}
|
||||
|
||||
get type(): string {
|
||||
return this.props.type;
|
||||
}
|
||||
|
||||
get selector(): string | undefined {
|
||||
return this.props.selector;
|
||||
}
|
||||
|
||||
get value(): string | undefined {
|
||||
return this.props.value;
|
||||
}
|
||||
|
||||
get seed(): number {
|
||||
return this.props.seed;
|
||||
}
|
||||
|
||||
get stateId(): string {
|
||||
return this.props.stateId;
|
||||
}
|
||||
|
||||
get sessionId(): string {
|
||||
return this.props.sessionId;
|
||||
}
|
||||
|
||||
get sequenceOrder(): number {
|
||||
return this.props.sequenceOrder;
|
||||
}
|
||||
}
|
||||
119
src/modules/crawling/domain/entities/CrawlSession.ts
Normal file
119
src/modules/crawling/domain/entities/CrawlSession.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import { AggregateRoot } from '../../../../shared/domain/AggregateRoot';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
import { Url } from '../value-objects/Url';
|
||||
import { CrawlStarted } from '../events/CrawlStarted';
|
||||
import { CrawlCompleted } from '../events/CrawlCompleted';
|
||||
import { CrawlFailed } from '../events/CrawlFailed';
|
||||
|
||||
type SessionStatusValue = 'running' | 'completed' | 'failed' | 'stopped';
|
||||
|
||||
interface CrawlSessionProps {
|
||||
url: string;
|
||||
status: SessionStatusValue;
|
||||
seed: number;
|
||||
maxStates: number;
|
||||
statesVisited: number;
|
||||
config: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface CreateCrawlSessionRequest {
|
||||
url: string;
|
||||
seed: number;
|
||||
maxStates: number;
|
||||
config?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export class CrawlSession extends AggregateRoot<CrawlSessionProps> {
|
||||
private constructor(props: CrawlSessionProps, id?: UniqueId) {
|
||||
super(props, id);
|
||||
}
|
||||
|
||||
static create(request: CreateCrawlSessionRequest): Result<CrawlSession, string> {
|
||||
const urlResult = Url.create(request.url);
|
||||
if (!urlResult.ok) {
|
||||
return Err(urlResult.error);
|
||||
}
|
||||
|
||||
const props: CrawlSessionProps = {
|
||||
url: request.url,
|
||||
status: 'running',
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
statesVisited: 0,
|
||||
config: request.config ?? {},
|
||||
};
|
||||
|
||||
const session = new CrawlSession(props);
|
||||
|
||||
session.addDomainEvent(
|
||||
new CrawlStarted(session.id.toString(), {
|
||||
url: request.url,
|
||||
seed: request.seed,
|
||||
maxStates: request.maxStates,
|
||||
})
|
||||
);
|
||||
|
||||
return Ok(session);
|
||||
}
|
||||
|
||||
get url(): string {
|
||||
return this.props.url;
|
||||
}
|
||||
|
||||
get status(): SessionStatusValue {
|
||||
return this.props.status;
|
||||
}
|
||||
|
||||
get seed(): number {
|
||||
return this.props.seed;
|
||||
}
|
||||
|
||||
get maxStates(): number {
|
||||
return this.props.maxStates;
|
||||
}
|
||||
|
||||
get statesVisited(): number {
|
||||
return this.props.statesVisited;
|
||||
}
|
||||
|
||||
get config(): Record<string, unknown> {
|
||||
return this.props.config;
|
||||
}
|
||||
|
||||
incrementStatesVisited(): void {
|
||||
this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 };
|
||||
}
|
||||
|
||||
complete(): void {
|
||||
this.props = { ...this.props, status: 'completed' };
|
||||
this.addDomainEvent(
|
||||
new CrawlCompleted(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
statesVisited: this.props.statesVisited,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
fail(reason: string): void {
|
||||
this.props = { ...this.props, status: 'failed' };
|
||||
this.addDomainEvent(
|
||||
new CrawlFailed(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
reason,
|
||||
statesVisited: this.props.statesVisited,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
this.props = { ...this.props, status: 'stopped' };
|
||||
this.addDomainEvent(
|
||||
new CrawlCompleted(this.id.toString(), {
|
||||
url: this.props.url,
|
||||
statesVisited: this.props.statesVisited,
|
||||
stopped: true,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
45
src/modules/crawling/domain/entities/CrawlState.ts
Normal file
45
src/modules/crawling/domain/entities/CrawlState.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { Entity } from '../../../../shared/domain/Entity';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
|
||||
interface CrawlStateProps {
|
||||
url: string;
|
||||
title: string;
|
||||
domSnapshot: string;
|
||||
visitCount: number;
|
||||
stateId: string;
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
export class CrawlState extends Entity<CrawlStateProps> {
|
||||
private constructor(props: CrawlStateProps, id?: UniqueId) {
|
||||
super(props, id);
|
||||
}
|
||||
|
||||
static create(props: CrawlStateProps, id?: UniqueId): CrawlState {
|
||||
return new CrawlState(props, id);
|
||||
}
|
||||
|
||||
get url(): string {
|
||||
return this.props.url;
|
||||
}
|
||||
|
||||
get title(): string {
|
||||
return this.props.title;
|
||||
}
|
||||
|
||||
get domSnapshot(): string {
|
||||
return this.props.domSnapshot;
|
||||
}
|
||||
|
||||
get visitCount(): number {
|
||||
return this.props.visitCount;
|
||||
}
|
||||
|
||||
get stateId(): string {
|
||||
return this.props.stateId;
|
||||
}
|
||||
|
||||
get sessionId(): string {
|
||||
return this.props.sessionId;
|
||||
}
|
||||
}
|
||||
13
src/modules/crawling/domain/events/ActionExecuted.ts
Normal file
13
src/modules/crawling/domain/events/ActionExecuted.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { randomUUID } from 'crypto';
|
||||
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
|
||||
|
||||
export class ActionExecuted implements DomainEvent {
|
||||
readonly eventId = randomUUID();
|
||||
readonly eventName = 'crawl.action_executed';
|
||||
readonly occurredOn = new Date();
|
||||
|
||||
constructor(
|
||||
readonly aggregateId: string,
|
||||
readonly payload: Record<string, unknown>
|
||||
) {}
|
||||
}
|
||||
13
src/modules/crawling/domain/events/CrawlCompleted.ts
Normal file
13
src/modules/crawling/domain/events/CrawlCompleted.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { randomUUID } from 'crypto';
|
||||
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
|
||||
|
||||
export class CrawlCompleted implements DomainEvent {
|
||||
readonly eventId = randomUUID();
|
||||
readonly eventName = 'crawl.completed';
|
||||
readonly occurredOn = new Date();
|
||||
|
||||
constructor(
|
||||
readonly aggregateId: string,
|
||||
readonly payload: Record<string, unknown>
|
||||
) {}
|
||||
}
|
||||
13
src/modules/crawling/domain/events/CrawlFailed.ts
Normal file
13
src/modules/crawling/domain/events/CrawlFailed.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { randomUUID } from 'crypto';
|
||||
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
|
||||
|
||||
export class CrawlFailed implements DomainEvent {
|
||||
readonly eventId = randomUUID();
|
||||
readonly eventName = 'crawl.failed';
|
||||
readonly occurredOn = new Date();
|
||||
|
||||
constructor(
|
||||
readonly aggregateId: string,
|
||||
readonly payload: Record<string, unknown>
|
||||
) {}
|
||||
}
|
||||
13
src/modules/crawling/domain/events/CrawlStarted.ts
Normal file
13
src/modules/crawling/domain/events/CrawlStarted.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { randomUUID } from 'crypto';
|
||||
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
|
||||
|
||||
export class CrawlStarted implements DomainEvent {
|
||||
readonly eventId = randomUUID();
|
||||
readonly eventName = 'crawl.started';
|
||||
readonly occurredOn = new Date();
|
||||
|
||||
constructor(
|
||||
readonly aggregateId: string,
|
||||
readonly payload: Record<string, unknown>
|
||||
) {}
|
||||
}
|
||||
13
src/modules/crawling/domain/events/StateDiscovered.ts
Normal file
13
src/modules/crawling/domain/events/StateDiscovered.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { randomUUID } from 'crypto';
|
||||
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
|
||||
|
||||
export class StateDiscovered implements DomainEvent {
|
||||
readonly eventId = randomUUID();
|
||||
readonly eventName = 'crawl.state_discovered';
|
||||
readonly occurredOn = new Date();
|
||||
|
||||
constructor(
|
||||
readonly aggregateId: string,
|
||||
readonly payload: Record<string, unknown>
|
||||
) {}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
import { CrawlSession } from '../entities/CrawlSession';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
|
||||
export interface ICrawlSessionRepository {
|
||||
save(session: CrawlSession): Promise<void>;
|
||||
findById(id: UniqueId): Promise<CrawlSession | null>;
|
||||
findAll(): Promise<CrawlSession[]>;
|
||||
update(session: CrawlSession): Promise<void>;
|
||||
}
|
||||
9
src/modules/crawling/domain/ports/ICrawlerEngine.ts
Normal file
9
src/modules/crawling/domain/ports/ICrawlerEngine.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
import { IState, IAction, IObservation } from '../../../../core/interfaces';
|
||||
|
||||
export interface ICrawlerEngine {
|
||||
launch(url: string): Promise<void>;
|
||||
close(): Promise<void>;
|
||||
discoverActions(state: IState): Promise<IAction[]>;
|
||||
executeAction(action: IAction): Promise<IObservation>;
|
||||
captureState(): Promise<IState>;
|
||||
}
|
||||
10
src/modules/crawling/domain/ports/IStateRepository.ts
Normal file
10
src/modules/crawling/domain/ports/IStateRepository.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { CrawlState } from '../entities/CrawlState';
|
||||
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||
|
||||
export interface IStateRepository {
|
||||
save(state: CrawlState): Promise<void>;
|
||||
findById(id: UniqueId): Promise<CrawlState | null>;
|
||||
findAll(): Promise<CrawlState[]>;
|
||||
findBySessionId(sessionId: string): Promise<CrawlState[]>;
|
||||
update(state: CrawlState): Promise<void>;
|
||||
}
|
||||
23
src/modules/crawling/domain/value-objects/Selector.ts
Normal file
23
src/modules/crawling/domain/value-objects/Selector.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { ValueObject } from '../../../../shared/domain/ValueObject';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
|
||||
interface SelectorProps {
|
||||
value: string;
|
||||
}
|
||||
|
||||
export class Selector extends ValueObject<SelectorProps> {
|
||||
private constructor(props: SelectorProps) {
|
||||
super(props);
|
||||
}
|
||||
|
||||
static create(raw: string): Result<Selector, string> {
|
||||
if (!raw || raw.trim().length === 0) {
|
||||
return Err('Selector must not be empty');
|
||||
}
|
||||
return Ok(new Selector({ value: raw.trim() }));
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
27
src/modules/crawling/domain/value-objects/SessionStatus.ts
Normal file
27
src/modules/crawling/domain/value-objects/SessionStatus.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { ValueObject } from '../../../../shared/domain/ValueObject';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
|
||||
type StatusValue = 'running' | 'completed' | 'failed' | 'stopped';
|
||||
|
||||
interface SessionStatusProps {
|
||||
value: StatusValue;
|
||||
}
|
||||
|
||||
const VALID_STATUSES: StatusValue[] = ['running', 'completed', 'failed', 'stopped'];
|
||||
|
||||
export class SessionStatus extends ValueObject<SessionStatusProps> {
|
||||
private constructor(props: SessionStatusProps) {
|
||||
super(props);
|
||||
}
|
||||
|
||||
static create(val: string): Result<SessionStatus, string> {
|
||||
if (!VALID_STATUSES.includes(val as StatusValue)) {
|
||||
return Err(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`);
|
||||
}
|
||||
return Ok(new SessionStatus({ value: val as StatusValue }));
|
||||
}
|
||||
|
||||
getValue(): StatusValue {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
27
src/modules/crawling/domain/value-objects/Url.ts
Normal file
27
src/modules/crawling/domain/value-objects/Url.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { ValueObject } from '../../../../shared/domain/ValueObject';
|
||||
import { Result, Ok, Err } from '../../../../shared/domain/Result';
|
||||
|
||||
interface UrlProps {
|
||||
value: string;
|
||||
}
|
||||
|
||||
export class Url extends ValueObject<UrlProps> {
|
||||
private constructor(props: UrlProps) {
|
||||
super(props);
|
||||
}
|
||||
|
||||
static create(raw: string): Result<Url, string> {
|
||||
if (!raw || raw.trim().length === 0) {
|
||||
return Err('URL must not be empty');
|
||||
}
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) {
|
||||
return Err('URL must start with http:// or https://');
|
||||
}
|
||||
return Ok(new Url({ value: trimmed }));
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return this.props.value;
|
||||
}
|
||||
}
|
||||
10
src/modules/crawling/index.ts
Normal file
10
src/modules/crawling/index.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
export * from './domain/entities/CrawlSession';
|
||||
export * from './domain/entities/CrawlState';
|
||||
export * from './domain/entities/CrawlAction';
|
||||
export * from './domain/ports/ICrawlerEngine';
|
||||
export * from './domain/ports/ICrawlSessionRepository';
|
||||
export * from './domain/ports/IStateRepository';
|
||||
export * from './application/commands/StartCrawlCommand';
|
||||
export * from './application/commands/StopCrawlCommand';
|
||||
export * from './application/queries/GetSessionQuery';
|
||||
export * from './application/queries/ListSessionsQuery';
|
||||
115
tests/modules/crawling.test.ts
Normal file
115
tests/modules/crawling.test.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { CrawlSession } from '../../src/modules/crawling/domain/entities/CrawlSession';
|
||||
import { StartCrawlCommand } from '../../src/modules/crawling/application/commands/StartCrawlCommand';
|
||||
import { ICrawlSessionRepository } from '../../src/modules/crawling/domain/ports/ICrawlSessionRepository';
|
||||
import { EventBus } from '../../src/shared/application/EventBus';
|
||||
import { DomainEvent } from '../../src/shared/domain/DomainEvent';
|
||||
import { EventHandler } from '../../src/shared/application/EventHandler';
|
||||
import { UniqueId } from '../../src/shared/domain/UniqueId';
|
||||
import { isOk, isErr } from '../../src/shared/domain/Result';
|
||||
|
||||
// --- CrawlSession domain ---
|
||||
describe('CrawlSession', () => {
|
||||
it('create returns Ok for valid url', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 42, maxStates: 10 });
|
||||
expect(isOk(result)).toBe(true);
|
||||
});
|
||||
|
||||
it('create returns Err for invalid url', () => {
|
||||
const result = CrawlSession.create({ url: 'not-a-url', seed: 42, maxStates: 10 });
|
||||
expect(isErr(result)).toBe(true);
|
||||
});
|
||||
|
||||
it('emits CrawlStarted event on create', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!result.ok) throw new Error('Expected Ok');
|
||||
const session = result.value;
|
||||
expect(session.domainEvents).toHaveLength(1);
|
||||
expect(session.domainEvents[0]?.eventName).toBe('crawl.started');
|
||||
});
|
||||
|
||||
it('status starts as running', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!result.ok) throw new Error('Expected Ok');
|
||||
expect(result.value.status).toBe('running');
|
||||
});
|
||||
|
||||
it('complete changes status and adds event', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!result.ok) throw new Error('Expected Ok');
|
||||
const session = result.value;
|
||||
session.clearEvents();
|
||||
session.complete();
|
||||
expect(session.status).toBe('completed');
|
||||
expect(session.domainEvents[0]?.eventName).toBe('crawl.completed');
|
||||
});
|
||||
|
||||
it('fail changes status and adds CrawlFailed event', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!result.ok) throw new Error('Expected Ok');
|
||||
const session = result.value;
|
||||
session.clearEvents();
|
||||
session.fail('browser crashed');
|
||||
expect(session.status).toBe('failed');
|
||||
expect(session.domainEvents[0]?.eventName).toBe('crawl.failed');
|
||||
});
|
||||
|
||||
it('incrementStatesVisited increments counter', () => {
|
||||
const result = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!result.ok) throw new Error('Expected Ok');
|
||||
const session = result.value;
|
||||
session.incrementStatesVisited();
|
||||
session.incrementStatesVisited();
|
||||
expect(session.statesVisited).toBe(2);
|
||||
});
|
||||
|
||||
it('equals compares by id', () => {
|
||||
const r1 = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
const r2 = CrawlSession.create({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
if (!r1.ok || !r2.ok) throw new Error('Expected Ok');
|
||||
expect(r1.value.equals(r2.value)).toBe(false); // different ids
|
||||
});
|
||||
});
|
||||
|
||||
// --- StartCrawlCommand ---
|
||||
describe('StartCrawlCommand', () => {
|
||||
const makeMockRepo = (): ICrawlSessionRepository => {
|
||||
const store = new Map<string, CrawlSession>();
|
||||
return {
|
||||
save: async (session) => { store.set(session.id.toString(), session); },
|
||||
findById: async (id: UniqueId) => store.get(id.toString()) ?? null,
|
||||
findAll: async () => [...store.values()],
|
||||
update: async (session) => { store.set(session.id.toString(), session); },
|
||||
};
|
||||
};
|
||||
|
||||
const makeMockBus = (): EventBus & { events: DomainEvent[] } => {
|
||||
const events: DomainEvent[] = [];
|
||||
return {
|
||||
events,
|
||||
publish: async (event: DomainEvent) => { events.push(event); },
|
||||
subscribe: (_name: string, _handler: EventHandler) => {},
|
||||
};
|
||||
};
|
||||
|
||||
it('returns Ok with sessionId for valid url', async () => {
|
||||
const cmd = new StartCrawlCommand(makeMockRepo(), makeMockBus());
|
||||
const result = await cmd.execute({ url: 'https://example.com', seed: 42, maxStates: 10 });
|
||||
expect(isOk(result)).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(typeof result.value.sessionId).toBe('string');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns Err for invalid url', async () => {
|
||||
const cmd = new StartCrawlCommand(makeMockRepo(), makeMockBus());
|
||||
const result = await cmd.execute({ url: 'not-a-url', seed: 42, maxStates: 10 });
|
||||
expect(isErr(result)).toBe(true);
|
||||
});
|
||||
|
||||
it('publishes CrawlStarted event via EventBus', async () => {
|
||||
const bus = makeMockBus();
|
||||
const cmd = new StartCrawlCommand(makeMockRepo(), bus);
|
||||
await cmd.execute({ url: 'https://example.com', seed: 1, maxStates: 5 });
|
||||
expect(bus.events.some(e => e.eventName === 'crawl.started')).toBe(true);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user