fase(3): crawling module domain and application

This commit is contained in:
debian
2026-03-04 16:32:09 -05:00
parent 4a58749048
commit 39c5313ba5
40 changed files with 1117 additions and 13 deletions

View File

@@ -0,0 +1,36 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.StartCrawlCommand = void 0;
const Result_1 = require("../../../../shared/domain/Result");
const Url_1 = require("../../domain/value-objects/Url");
const CrawlSession_1 = require("../../domain/entities/CrawlSession");
class StartCrawlCommand {
constructor(repository, eventBus) {
this.repository = repository;
this.eventBus = eventBus;
}
async execute(request) {
const urlResult = Url_1.Url.create(request.url);
if (!urlResult.ok) {
return (0, Result_1.Err)(urlResult.error);
}
const sessionResult = CrawlSession_1.CrawlSession.create({
url: request.url,
seed: request.seed,
maxStates: request.maxStates,
config: request.config,
});
if (!sessionResult.ok) {
return (0, Result_1.Err)(sessionResult.error);
}
const session = sessionResult.value;
await this.repository.save(session);
const events = session.domainEvents;
for (const event of events) {
await this.eventBus.publish(event);
}
session.clearEvents();
return (0, Result_1.Ok)({ sessionId: session.id.toString() });
}
}
exports.StartCrawlCommand = StartCrawlCommand;

View File

@@ -0,0 +1,27 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.StopCrawlCommand = void 0;
const Result_1 = require("../../../../shared/domain/Result");
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
class StopCrawlCommand {
constructor(repository, eventBus) {
this.repository = repository;
this.eventBus = eventBus;
}
async execute(request) {
const id = UniqueId_1.UniqueId.from(request.sessionId);
const session = await this.repository.findById(id);
if (!session) {
return (0, Result_1.Err)('Session not found');
}
session.stop();
await this.repository.update(session);
const events = session.domainEvents;
for (const event of events) {
await this.eventBus.publish(event);
}
session.clearEvents();
return (0, Result_1.Ok)(undefined);
}
}
exports.StopCrawlCommand = StopCrawlCommand;

View File

@@ -0,0 +1,28 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.GetSessionQuery = void 0;
const Result_1 = require("../../../../shared/domain/Result");
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
class GetSessionQuery {
constructor(repository) {
this.repository = repository;
}
async execute(request) {
const id = UniqueId_1.UniqueId.from(request.sessionId);
const session = await this.repository.findById(id);
if (!session) {
return (0, Result_1.Err)('Session not found');
}
const dto = {
id: session.id.toString(),
url: session.url,
status: session.status,
seed: session.seed,
maxStates: session.maxStates,
statesVisited: session.statesVisited,
config: session.config,
};
return (0, Result_1.Ok)(dto);
}
}
exports.GetSessionQuery = GetSessionQuery;

View File

@@ -0,0 +1,23 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ListSessionsQuery = void 0;
const Result_1 = require("../../../../shared/domain/Result");
class ListSessionsQuery {
constructor(repository) {
this.repository = repository;
}
async execute(_request) {
const sessions = await this.repository.findAll();
const dtos = sessions.map((session) => ({
id: session.id.toString(),
url: session.url,
status: session.status,
seed: session.seed,
maxStates: session.maxStates,
statesVisited: session.statesVisited,
config: session.config,
}));
return (0, Result_1.Ok)(dtos);
}
}
exports.ListSessionsQuery = ListSessionsQuery;

View File

@@ -0,0 +1,34 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlAction = void 0;
const Entity_1 = require("../../../../shared/domain/Entity");
class CrawlAction extends Entity_1.Entity {
constructor(props, id) {
super(props, id);
}
static create(props, id) {
return new CrawlAction(props, id);
}
get type() {
return this.props.type;
}
get selector() {
return this.props.selector;
}
get value() {
return this.props.value;
}
get seed() {
return this.props.seed;
}
get stateId() {
return this.props.stateId;
}
get sessionId() {
return this.props.sessionId;
}
get sequenceOrder() {
return this.props.sequenceOrder;
}
}
exports.CrawlAction = CrawlAction;

View File

@@ -0,0 +1,80 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlSession = void 0;
const AggregateRoot_1 = require("../../../../shared/domain/AggregateRoot");
const Result_1 = require("../../../../shared/domain/Result");
const Url_1 = require("../value-objects/Url");
const CrawlStarted_1 = require("../events/CrawlStarted");
const CrawlCompleted_1 = require("../events/CrawlCompleted");
const CrawlFailed_1 = require("../events/CrawlFailed");
class CrawlSession extends AggregateRoot_1.AggregateRoot {
constructor(props, id) {
super(props, id);
}
static create(request) {
const urlResult = Url_1.Url.create(request.url);
if (!urlResult.ok) {
return (0, Result_1.Err)(urlResult.error);
}
const props = {
url: request.url,
status: 'running',
seed: request.seed,
maxStates: request.maxStates,
statesVisited: 0,
config: request.config ?? {},
};
const session = new CrawlSession(props);
session.addDomainEvent(new CrawlStarted_1.CrawlStarted(session.id.toString(), {
url: request.url,
seed: request.seed,
maxStates: request.maxStates,
}));
return (0, Result_1.Ok)(session);
}
get url() {
return this.props.url;
}
get status() {
return this.props.status;
}
get seed() {
return this.props.seed;
}
get maxStates() {
return this.props.maxStates;
}
get statesVisited() {
return this.props.statesVisited;
}
get config() {
return this.props.config;
}
incrementStatesVisited() {
this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 };
}
complete() {
this.props = { ...this.props, status: 'completed' };
this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), {
url: this.props.url,
statesVisited: this.props.statesVisited,
}));
}
fail(reason) {
this.props = { ...this.props, status: 'failed' };
this.addDomainEvent(new CrawlFailed_1.CrawlFailed(this.id.toString(), {
url: this.props.url,
reason,
statesVisited: this.props.statesVisited,
}));
}
stop() {
this.props = { ...this.props, status: 'stopped' };
this.addDomainEvent(new CrawlCompleted_1.CrawlCompleted(this.id.toString(), {
url: this.props.url,
statesVisited: this.props.statesVisited,
stopped: true,
}));
}
}
exports.CrawlSession = CrawlSession;

View File

@@ -0,0 +1,31 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlState = void 0;
const Entity_1 = require("../../../../shared/domain/Entity");
class CrawlState extends Entity_1.Entity {
constructor(props, id) {
super(props, id);
}
static create(props, id) {
return new CrawlState(props, id);
}
get url() {
return this.props.url;
}
get title() {
return this.props.title;
}
get domSnapshot() {
return this.props.domSnapshot;
}
get visitCount() {
return this.props.visitCount;
}
get stateId() {
return this.props.stateId;
}
get sessionId() {
return this.props.sessionId;
}
}
exports.CrawlState = CrawlState;

View File

@@ -0,0 +1,14 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ActionExecuted = void 0;
const crypto_1 = require("crypto");
class ActionExecuted {
constructor(aggregateId, payload) {
this.aggregateId = aggregateId;
this.payload = payload;
this.eventId = (0, crypto_1.randomUUID)();
this.eventName = 'crawl.action_executed';
this.occurredOn = new Date();
}
}
exports.ActionExecuted = ActionExecuted;

View File

@@ -0,0 +1,14 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlCompleted = void 0;
const crypto_1 = require("crypto");
class CrawlCompleted {
constructor(aggregateId, payload) {
this.aggregateId = aggregateId;
this.payload = payload;
this.eventId = (0, crypto_1.randomUUID)();
this.eventName = 'crawl.completed';
this.occurredOn = new Date();
}
}
exports.CrawlCompleted = CrawlCompleted;

View File

@@ -0,0 +1,14 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlFailed = void 0;
const crypto_1 = require("crypto");
class CrawlFailed {
constructor(aggregateId, payload) {
this.aggregateId = aggregateId;
this.payload = payload;
this.eventId = (0, crypto_1.randomUUID)();
this.eventName = 'crawl.failed';
this.occurredOn = new Date();
}
}
exports.CrawlFailed = CrawlFailed;

View File

@@ -0,0 +1,14 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlStarted = void 0;
const crypto_1 = require("crypto");
class CrawlStarted {
constructor(aggregateId, payload) {
this.aggregateId = aggregateId;
this.payload = payload;
this.eventId = (0, crypto_1.randomUUID)();
this.eventName = 'crawl.started';
this.occurredOn = new Date();
}
}
exports.CrawlStarted = CrawlStarted;

View File

@@ -0,0 +1,14 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.StateDiscovered = void 0;
const crypto_1 = require("crypto");
class StateDiscovered {
constructor(aggregateId, payload) {
this.aggregateId = aggregateId;
this.payload = payload;
this.eventId = (0, crypto_1.randomUUID)();
this.eventName = 'crawl.state_discovered';
this.occurredOn = new Date();
}
}
exports.StateDiscovered = StateDiscovered;

View File

@@ -0,0 +1,2 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });

View File

@@ -0,0 +1,2 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });

View File

@@ -0,0 +1,2 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });

View File

@@ -0,0 +1,20 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Selector = void 0;
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
const Result_1 = require("../../../../shared/domain/Result");
class Selector extends ValueObject_1.ValueObject {
constructor(props) {
super(props);
}
static create(raw) {
if (!raw || raw.trim().length === 0) {
return (0, Result_1.Err)('Selector must not be empty');
}
return (0, Result_1.Ok)(new Selector({ value: raw.trim() }));
}
toString() {
return this.props.value;
}
}
exports.Selector = Selector;

View File

@@ -0,0 +1,21 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SessionStatus = void 0;
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
const Result_1 = require("../../../../shared/domain/Result");
const VALID_STATUSES = ['running', 'completed', 'failed', 'stopped'];
class SessionStatus extends ValueObject_1.ValueObject {
constructor(props) {
super(props);
}
static create(val) {
if (!VALID_STATUSES.includes(val)) {
return (0, Result_1.Err)(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`);
}
return (0, Result_1.Ok)(new SessionStatus({ value: val }));
}
getValue() {
return this.props.value;
}
}
exports.SessionStatus = SessionStatus;

View File

@@ -0,0 +1,24 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Url = void 0;
const ValueObject_1 = require("../../../../shared/domain/ValueObject");
const Result_1 = require("../../../../shared/domain/Result");
class Url extends ValueObject_1.ValueObject {
constructor(props) {
super(props);
}
static create(raw) {
if (!raw || raw.trim().length === 0) {
return (0, Result_1.Err)('URL must not be empty');
}
const trimmed = raw.trim();
if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) {
return (0, Result_1.Err)('URL must start with http:// or https://');
}
return (0, Result_1.Ok)(new Url({ value: trimmed }));
}
toString() {
return this.props.value;
}
}
exports.Url = Url;

26
dist/modules/crawling/index.js vendored Normal file
View File

@@ -0,0 +1,26 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
Object.defineProperty(exports, "__esModule", { value: true });
__exportStar(require("./domain/entities/CrawlSession"), exports);
__exportStar(require("./domain/entities/CrawlState"), exports);
__exportStar(require("./domain/entities/CrawlAction"), exports);
__exportStar(require("./domain/ports/ICrawlerEngine"), exports);
__exportStar(require("./domain/ports/ICrawlSessionRepository"), exports);
__exportStar(require("./domain/ports/IStateRepository"), exports);
__exportStar(require("./application/commands/StartCrawlCommand"), exports);
__exportStar(require("./application/commands/StopCrawlCommand"), exports);
__exportStar(require("./application/queries/GetSessionQuery"), exports);
__exportStar(require("./application/queries/ListSessionsQuery"), exports);