fase(3): crawling module domain and application

This commit is contained in:
debian
2026-03-04 16:32:09 -05:00
parent 4a58749048
commit 39c5313ba5
40 changed files with 1117 additions and 13 deletions

View File

@@ -0,0 +1,50 @@
import { Entity } from '../../../../shared/domain/Entity';
import { UniqueId } from '../../../../shared/domain/UniqueId';
interface CrawlActionProps {
type: string;
selector?: string;
value?: string;
seed: number;
stateId: string;
sessionId: string;
sequenceOrder: number;
}
export class CrawlAction extends Entity<CrawlActionProps> {
private constructor(props: CrawlActionProps, id?: UniqueId) {
super(props, id);
}
static create(props: CrawlActionProps, id?: UniqueId): CrawlAction {
return new CrawlAction(props, id);
}
get type(): string {
return this.props.type;
}
get selector(): string | undefined {
return this.props.selector;
}
get value(): string | undefined {
return this.props.value;
}
get seed(): number {
return this.props.seed;
}
get stateId(): string {
return this.props.stateId;
}
get sessionId(): string {
return this.props.sessionId;
}
get sequenceOrder(): number {
return this.props.sequenceOrder;
}
}

View File

@@ -0,0 +1,119 @@
import { AggregateRoot } from '../../../../shared/domain/AggregateRoot';
import { UniqueId } from '../../../../shared/domain/UniqueId';
import { Result, Ok, Err } from '../../../../shared/domain/Result';
import { Url } from '../value-objects/Url';
import { CrawlStarted } from '../events/CrawlStarted';
import { CrawlCompleted } from '../events/CrawlCompleted';
import { CrawlFailed } from '../events/CrawlFailed';
type SessionStatusValue = 'running' | 'completed' | 'failed' | 'stopped';
interface CrawlSessionProps {
url: string;
status: SessionStatusValue;
seed: number;
maxStates: number;
statesVisited: number;
config: Record<string, unknown>;
}
export interface CreateCrawlSessionRequest {
url: string;
seed: number;
maxStates: number;
config?: Record<string, unknown>;
}
export class CrawlSession extends AggregateRoot<CrawlSessionProps> {
private constructor(props: CrawlSessionProps, id?: UniqueId) {
super(props, id);
}
static create(request: CreateCrawlSessionRequest): Result<CrawlSession, string> {
const urlResult = Url.create(request.url);
if (!urlResult.ok) {
return Err(urlResult.error);
}
const props: CrawlSessionProps = {
url: request.url,
status: 'running',
seed: request.seed,
maxStates: request.maxStates,
statesVisited: 0,
config: request.config ?? {},
};
const session = new CrawlSession(props);
session.addDomainEvent(
new CrawlStarted(session.id.toString(), {
url: request.url,
seed: request.seed,
maxStates: request.maxStates,
})
);
return Ok(session);
}
get url(): string {
return this.props.url;
}
get status(): SessionStatusValue {
return this.props.status;
}
get seed(): number {
return this.props.seed;
}
get maxStates(): number {
return this.props.maxStates;
}
get statesVisited(): number {
return this.props.statesVisited;
}
get config(): Record<string, unknown> {
return this.props.config;
}
incrementStatesVisited(): void {
this.props = { ...this.props, statesVisited: this.props.statesVisited + 1 };
}
complete(): void {
this.props = { ...this.props, status: 'completed' };
this.addDomainEvent(
new CrawlCompleted(this.id.toString(), {
url: this.props.url,
statesVisited: this.props.statesVisited,
})
);
}
fail(reason: string): void {
this.props = { ...this.props, status: 'failed' };
this.addDomainEvent(
new CrawlFailed(this.id.toString(), {
url: this.props.url,
reason,
statesVisited: this.props.statesVisited,
})
);
}
stop(): void {
this.props = { ...this.props, status: 'stopped' };
this.addDomainEvent(
new CrawlCompleted(this.id.toString(), {
url: this.props.url,
statesVisited: this.props.statesVisited,
stopped: true,
})
);
}
}

View File

@@ -0,0 +1,45 @@
import { Entity } from '../../../../shared/domain/Entity';
import { UniqueId } from '../../../../shared/domain/UniqueId';
interface CrawlStateProps {
url: string;
title: string;
domSnapshot: string;
visitCount: number;
stateId: string;
sessionId: string;
}
export class CrawlState extends Entity<CrawlStateProps> {
private constructor(props: CrawlStateProps, id?: UniqueId) {
super(props, id);
}
static create(props: CrawlStateProps, id?: UniqueId): CrawlState {
return new CrawlState(props, id);
}
get url(): string {
return this.props.url;
}
get title(): string {
return this.props.title;
}
get domSnapshot(): string {
return this.props.domSnapshot;
}
get visitCount(): number {
return this.props.visitCount;
}
get stateId(): string {
return this.props.stateId;
}
get sessionId(): string {
return this.props.sessionId;
}
}

View File

@@ -0,0 +1,13 @@
import { randomUUID } from 'crypto';
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
export class ActionExecuted implements DomainEvent {
readonly eventId = randomUUID();
readonly eventName = 'crawl.action_executed';
readonly occurredOn = new Date();
constructor(
readonly aggregateId: string,
readonly payload: Record<string, unknown>
) {}
}

View File

@@ -0,0 +1,13 @@
import { randomUUID } from 'crypto';
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
export class CrawlCompleted implements DomainEvent {
readonly eventId = randomUUID();
readonly eventName = 'crawl.completed';
readonly occurredOn = new Date();
constructor(
readonly aggregateId: string,
readonly payload: Record<string, unknown>
) {}
}

View File

@@ -0,0 +1,13 @@
import { randomUUID } from 'crypto';
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
export class CrawlFailed implements DomainEvent {
readonly eventId = randomUUID();
readonly eventName = 'crawl.failed';
readonly occurredOn = new Date();
constructor(
readonly aggregateId: string,
readonly payload: Record<string, unknown>
) {}
}

View File

@@ -0,0 +1,13 @@
import { randomUUID } from 'crypto';
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
export class CrawlStarted implements DomainEvent {
readonly eventId = randomUUID();
readonly eventName = 'crawl.started';
readonly occurredOn = new Date();
constructor(
readonly aggregateId: string,
readonly payload: Record<string, unknown>
) {}
}

View File

@@ -0,0 +1,13 @@
import { randomUUID } from 'crypto';
import { DomainEvent } from '../../../../shared/domain/DomainEvent';
export class StateDiscovered implements DomainEvent {
readonly eventId = randomUUID();
readonly eventName = 'crawl.state_discovered';
readonly occurredOn = new Date();
constructor(
readonly aggregateId: string,
readonly payload: Record<string, unknown>
) {}
}

View File

@@ -0,0 +1,9 @@
import { CrawlSession } from '../entities/CrawlSession';
import { UniqueId } from '../../../../shared/domain/UniqueId';
export interface ICrawlSessionRepository {
save(session: CrawlSession): Promise<void>;
findById(id: UniqueId): Promise<CrawlSession | null>;
findAll(): Promise<CrawlSession[]>;
update(session: CrawlSession): Promise<void>;
}

View File

@@ -0,0 +1,9 @@
import { IState, IAction, IObservation } from '../../../../core/interfaces';
export interface ICrawlerEngine {
launch(url: string): Promise<void>;
close(): Promise<void>;
discoverActions(state: IState): Promise<IAction[]>;
executeAction(action: IAction): Promise<IObservation>;
captureState(): Promise<IState>;
}

View File

@@ -0,0 +1,10 @@
import { CrawlState } from '../entities/CrawlState';
import { UniqueId } from '../../../../shared/domain/UniqueId';
export interface IStateRepository {
save(state: CrawlState): Promise<void>;
findById(id: UniqueId): Promise<CrawlState | null>;
findAll(): Promise<CrawlState[]>;
findBySessionId(sessionId: string): Promise<CrawlState[]>;
update(state: CrawlState): Promise<void>;
}

View File

@@ -0,0 +1,23 @@
import { ValueObject } from '../../../../shared/domain/ValueObject';
import { Result, Ok, Err } from '../../../../shared/domain/Result';
interface SelectorProps {
value: string;
}
export class Selector extends ValueObject<SelectorProps> {
private constructor(props: SelectorProps) {
super(props);
}
static create(raw: string): Result<Selector, string> {
if (!raw || raw.trim().length === 0) {
return Err('Selector must not be empty');
}
return Ok(new Selector({ value: raw.trim() }));
}
toString(): string {
return this.props.value;
}
}

View File

@@ -0,0 +1,27 @@
import { ValueObject } from '../../../../shared/domain/ValueObject';
import { Result, Ok, Err } from '../../../../shared/domain/Result';
type StatusValue = 'running' | 'completed' | 'failed' | 'stopped';
interface SessionStatusProps {
value: StatusValue;
}
const VALID_STATUSES: StatusValue[] = ['running', 'completed', 'failed', 'stopped'];
export class SessionStatus extends ValueObject<SessionStatusProps> {
private constructor(props: SessionStatusProps) {
super(props);
}
static create(val: string): Result<SessionStatus, string> {
if (!VALID_STATUSES.includes(val as StatusValue)) {
return Err(`Invalid session status: "${val}". Must be one of: ${VALID_STATUSES.join(', ')}`);
}
return Ok(new SessionStatus({ value: val as StatusValue }));
}
getValue(): StatusValue {
return this.props.value;
}
}

View File

@@ -0,0 +1,27 @@
import { ValueObject } from '../../../../shared/domain/ValueObject';
import { Result, Ok, Err } from '../../../../shared/domain/Result';
interface UrlProps {
value: string;
}
export class Url extends ValueObject<UrlProps> {
private constructor(props: UrlProps) {
super(props);
}
static create(raw: string): Result<Url, string> {
if (!raw || raw.trim().length === 0) {
return Err('URL must not be empty');
}
const trimmed = raw.trim();
if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) {
return Err('URL must start with http:// or https://');
}
return Ok(new Url({ value: trimmed }));
}
toString(): string {
return this.props.value;
}
}