fase(4): crawling infrastructure with migrated code
This commit is contained in:
@@ -1 +1 @@
|
|||||||
f8191133c83b74200c5f851d012051527da899d6
|
39c5313ba581cfeb5d793df7c3cef21923473127
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"status": "completed", "timestamp": "2026-03-04 04:32:44"}
|
{"status": "failed", "timestamp": "2026-03-04 16:32:12"}
|
||||||
|
|||||||
@@ -11,6 +11,10 @@ class CrawlSession extends AggregateRoot_1.AggregateRoot {
|
|||||||
constructor(props, id) {
|
constructor(props, id) {
|
||||||
super(props, id);
|
super(props, id);
|
||||||
}
|
}
|
||||||
|
/** Reconstruct from persistence without emitting domain events */
|
||||||
|
static reconstitute(props, id) {
|
||||||
|
return new CrawlSession(props, id);
|
||||||
|
}
|
||||||
static create(request) {
|
static create(request) {
|
||||||
const urlResult = Url_1.Url.create(request.url);
|
const urlResult = Url_1.Url.create(request.url);
|
||||||
if (!urlResult.ok) {
|
if (!urlResult.ok) {
|
||||||
|
|||||||
72
dist/modules/crawling/infrastructure/adapters/CrawlingStateGraph.js
vendored
Normal file
72
dist/modules/crawling/infrastructure/adapters/CrawlingStateGraph.js
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.CrawlingStateGraph = void 0;
|
||||||
|
class CrawlingStateGraph {
|
||||||
|
constructor() {
|
||||||
|
this.states = new Map();
|
||||||
|
this.transitions = [];
|
||||||
|
/** Insertion order for BFS */
|
||||||
|
this.insertionOrder = [];
|
||||||
|
}
|
||||||
|
addState(state) {
|
||||||
|
if (!this.states.has(state.id)) {
|
||||||
|
this.states.set(state.id, state);
|
||||||
|
this.insertionOrder.push(state.id);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const existing = this.states.get(state.id);
|
||||||
|
this.states.set(state.id, { ...existing, visitCount: existing.visitCount + 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hasState(stateId) {
|
||||||
|
return this.states.has(stateId);
|
||||||
|
}
|
||||||
|
getState(stateId) {
|
||||||
|
return this.states.get(stateId);
|
||||||
|
}
|
||||||
|
incrementVisit(stateId) {
|
||||||
|
const state = this.states.get(stateId);
|
||||||
|
if (state) {
|
||||||
|
this.states.set(stateId, { ...state, visitCount: state.visitCount + 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
recordTransition(fromId, action, toId) {
|
||||||
|
this.transitions.push({ fromId, action, toId, timestamp: Date.now() });
|
||||||
|
}
|
||||||
|
getUnvisited() {
|
||||||
|
return this.insertionOrder
|
||||||
|
.map((id) => this.states.get(id))
|
||||||
|
.filter((s) => s.visitCount === 0);
|
||||||
|
}
|
||||||
|
/** BFS heuristic: returns the oldest unvisited state, or null if none */
|
||||||
|
getNextToExplore() {
|
||||||
|
const unvisited = this.getUnvisited();
|
||||||
|
return unvisited.length > 0 ? unvisited[0] : null;
|
||||||
|
}
|
||||||
|
getAllStates() {
|
||||||
|
return this.insertionOrder.map((id) => this.states.get(id));
|
||||||
|
}
|
||||||
|
getTransitions() {
|
||||||
|
return [...this.transitions];
|
||||||
|
}
|
||||||
|
toJSON() {
|
||||||
|
return {
|
||||||
|
stateCount: this.states.size,
|
||||||
|
transitionCount: this.transitions.length,
|
||||||
|
states: this.getAllStates().map((s) => ({
|
||||||
|
id: s.id,
|
||||||
|
url: s.url,
|
||||||
|
title: s.title,
|
||||||
|
visitCount: s.visitCount,
|
||||||
|
})),
|
||||||
|
transitions: this.transitions.map((t) => ({
|
||||||
|
fromId: t.fromId,
|
||||||
|
toId: t.toId,
|
||||||
|
actionId: t.action.id,
|
||||||
|
actionType: t.action.type,
|
||||||
|
timestamp: t.timestamp,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.CrawlingStateGraph = CrawlingStateGraph;
|
||||||
180
dist/modules/crawling/infrastructure/adapters/ExplorationOrchestrator.js
vendored
Normal file
180
dist/modules/crawling/infrastructure/adapters/ExplorationOrchestrator.js
vendored
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.ExplorationOrchestrator = void 0;
|
||||||
|
const AnomalyDetector_1 = require("../../../../core/AnomalyDetector");
|
||||||
|
const Logger_1 = require("../../../../core/Logger");
|
||||||
|
class ExplorationOrchestrator {
|
||||||
|
constructor(config) {
|
||||||
|
this.actionTrace = [];
|
||||||
|
this.aborted = false;
|
||||||
|
this.graph = config.graph;
|
||||||
|
this.engine = config.engine;
|
||||||
|
this.detector = config.detector ?? new AnomalyDetector_1.AnomalyDetector();
|
||||||
|
this.collectors = config.collectors ?? [];
|
||||||
|
this.exporters = config.exporters ?? [];
|
||||||
|
this.reproducer = config.reproducer;
|
||||||
|
this.logger = config.logger ?? new Logger_1.NullLogger();
|
||||||
|
this.seed = config.seed;
|
||||||
|
this.url = config.url;
|
||||||
|
this.maxSteps = config.maxSteps ?? 100;
|
||||||
|
this.outputDir = config.outputDir ?? './reports';
|
||||||
|
this.events = config.events ?? {};
|
||||||
|
this.sessionId = config.sessionId ?? `${Date.now()}_${config.seed}`;
|
||||||
|
this.explorationConfig = config.explorationConfig ?? {};
|
||||||
|
this.fuzzingPlugin = config.fuzzingPlugin;
|
||||||
|
this.stateHooks = config.stateHooks ?? [];
|
||||||
|
}
|
||||||
|
stop() {
|
||||||
|
this.aborted = true;
|
||||||
|
}
|
||||||
|
async run() {
|
||||||
|
const anomalies = [];
|
||||||
|
let stepsExecuted = 0;
|
||||||
|
let depth = 0;
|
||||||
|
const sessionTimeoutMs = this.explorationConfig.sessionTimeoutMs ?? 0;
|
||||||
|
const maxDepth = this.explorationConfig.maxDepth ?? Infinity;
|
||||||
|
const sessionStart = Date.now();
|
||||||
|
this.logger.log({
|
||||||
|
event: 'session_start',
|
||||||
|
timestamp: sessionStart,
|
||||||
|
seed: this.seed,
|
||||||
|
target: this.url,
|
||||||
|
});
|
||||||
|
this.events.onSessionStarted?.(this.sessionId, this.url);
|
||||||
|
const isTimedOut = () => sessionTimeoutMs > 0 && Date.now() - sessionStart >= sessionTimeoutMs;
|
||||||
|
try {
|
||||||
|
await this.engine.launch(this.url);
|
||||||
|
const initialState = await this.engine.captureState();
|
||||||
|
this.graph.addState(initialState);
|
||||||
|
this.logger.log({
|
||||||
|
event: 'state_discovered',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: initialState.id,
|
||||||
|
url: initialState.url,
|
||||||
|
title: initialState.title,
|
||||||
|
});
|
||||||
|
this.events.onStateDiscovered?.(this.sessionId, initialState.id, initialState.url, initialState.title);
|
||||||
|
while (stepsExecuted < this.maxSteps && !this.aborted && !isTimedOut() && depth <= maxDepth) {
|
||||||
|
const currentState = this.graph.getNextToExplore();
|
||||||
|
if (!currentState)
|
||||||
|
break;
|
||||||
|
this.graph.incrementVisit(currentState.id);
|
||||||
|
const actions = await this.engine.discoverActions(currentState);
|
||||||
|
if (actions.length === 0)
|
||||||
|
continue;
|
||||||
|
const actionIndex = (this.seed + stepsExecuted) % actions.length;
|
||||||
|
const action = actions[actionIndex];
|
||||||
|
this.logger.log({
|
||||||
|
event: 'action_executed',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
actionId: action.id,
|
||||||
|
type: action.type,
|
||||||
|
selector: action.selector,
|
||||||
|
value: action.value,
|
||||||
|
url: action.url,
|
||||||
|
});
|
||||||
|
this.events.onActionExecuted?.(this.sessionId, action.type, action.selector, Date.now());
|
||||||
|
const observation = await this.engine.executeAction(action);
|
||||||
|
this.actionTrace.push(action);
|
||||||
|
if (!this.graph.hasState(observation.newStateId)) {
|
||||||
|
const newState = await this.engine.captureState();
|
||||||
|
this.graph.addState(newState);
|
||||||
|
depth += 1;
|
||||||
|
this.logger.log({
|
||||||
|
event: 'state_discovered',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: newState.id,
|
||||||
|
url: newState.url,
|
||||||
|
title: newState.title,
|
||||||
|
});
|
||||||
|
this.events.onStateDiscovered?.(this.sessionId, newState.id, newState.url, newState.title);
|
||||||
|
for (const hook of this.stateHooks) {
|
||||||
|
const hookAnomalies = await hook(newState, this.engine, this.sessionId, [...this.actionTrace]).catch(() => []);
|
||||||
|
for (const anomaly of hookAnomalies) {
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
this.logger.log({
|
||||||
|
event: 'anomaly_detected',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
anomalyId: anomaly.id,
|
||||||
|
type: anomaly.type,
|
||||||
|
severity: anomaly.severity,
|
||||||
|
});
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
await exporter.export(anomaly, `${this.outputDir}/${anomaly.id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.graph.recordTransition(currentState.id, action, observation.newStateId);
|
||||||
|
this.logger.log({
|
||||||
|
event: 'exploration_step',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: currentState.id,
|
||||||
|
actionId: action.id,
|
||||||
|
});
|
||||||
|
const detected = this.detector.detect(observation, [...this.actionTrace]);
|
||||||
|
for (const anomaly of detected) {
|
||||||
|
for (const collector of this.collectors) {
|
||||||
|
const evidence = await collector.collect(anomaly, this.engine);
|
||||||
|
Object.assign(anomaly.evidence, evidence);
|
||||||
|
}
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
this.logger.log({
|
||||||
|
event: 'anomaly_detected',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
anomalyId: anomaly.id,
|
||||||
|
type: anomaly.type,
|
||||||
|
severity: anomaly.severity,
|
||||||
|
});
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
const reportDir = `${this.outputDir}/${anomaly.id}`;
|
||||||
|
await exporter.export(anomaly, reportDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stepsExecuted += 1;
|
||||||
|
if (this.fuzzingPlugin &&
|
||||||
|
this.explorationConfig.fuzzingEnabled !== false &&
|
||||||
|
currentState.domSnapshot) {
|
||||||
|
const fuzzActions = this.fuzzingPlugin.generateFuzzActions(currentState.domSnapshot, currentState);
|
||||||
|
for (const fuzzAction of fuzzActions) {
|
||||||
|
if (this.aborted || isTimedOut())
|
||||||
|
break;
|
||||||
|
const fuzzObs = await this.engine.executeAction(fuzzAction);
|
||||||
|
this.actionTrace.push(fuzzAction);
|
||||||
|
const fuzzAnomalies = this.detector.detect(fuzzObs, [...this.actionTrace]);
|
||||||
|
for (const anomaly of fuzzAnomalies) {
|
||||||
|
for (const collector of this.collectors) {
|
||||||
|
const evidence = await collector.collect(anomaly, this.engine);
|
||||||
|
Object.assign(anomaly.evidence, evidence);
|
||||||
|
}
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
await exporter.export(anomaly, `${this.outputDir}/${anomaly.id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
this.events.onSessionError?.(this.sessionId, msg);
|
||||||
|
await this.engine.close().catch(() => undefined);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
await this.engine.close();
|
||||||
|
const statesVisited = this.graph.getAllStates().filter((s) => s.visitCount > 0).length;
|
||||||
|
this.logger.log({
|
||||||
|
event: 'session_end',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
statesVisited,
|
||||||
|
anomaliesFound: anomalies.length,
|
||||||
|
});
|
||||||
|
this.events.onSessionCompleted?.(this.sessionId, statesVisited, anomalies.length);
|
||||||
|
return { statesVisited, anomaliesFound: anomalies.length, anomalies };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.ExplorationOrchestrator = ExplorationOrchestrator;
|
||||||
13
dist/modules/crawling/infrastructure/adapters/PlaywrightCrawlerEngine.js
vendored
Normal file
13
dist/modules/crawling/infrastructure/adapters/PlaywrightCrawlerEngine.js
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.PlaywrightCrawlerEngine = void 0;
|
||||||
|
/**
|
||||||
|
* PlaywrightCrawlerEngine — adapts PlaywrightAgent to implement the ICrawlerEngine port.
|
||||||
|
*/
|
||||||
|
const PlaywrightAgent_1 = require("../../../../plugins/agents/PlaywrightAgent");
|
||||||
|
class PlaywrightCrawlerEngine extends PlaywrightAgent_1.PlaywrightAgent {
|
||||||
|
constructor(config = {}) {
|
||||||
|
super(config);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.PlaywrightCrawlerEngine = PlaywrightCrawlerEngine;
|
||||||
56
dist/modules/crawling/infrastructure/http/CrawlingController.js
vendored
Normal file
56
dist/modules/crawling/infrastructure/http/CrawlingController.js
vendored
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.createCrawlingRouter = createCrawlingRouter;
|
||||||
|
/**
|
||||||
|
* CrawlingController — thin Express controller for crawling routes.
|
||||||
|
* Delegates to use cases; returns Result-based responses.
|
||||||
|
*/
|
||||||
|
const express_1 = require("express");
|
||||||
|
function createCrawlingRouter(deps) {
|
||||||
|
const router = (0, express_1.Router)();
|
||||||
|
// POST /api/sessions — start a new crawl session
|
||||||
|
router.post('/', async (req, res) => {
|
||||||
|
const body = req.body;
|
||||||
|
const { url, seed = 42, maxStates = 50, config } = body;
|
||||||
|
if (!url || typeof url !== 'string') {
|
||||||
|
res.status(400).json({ error: 'url is required' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const result = await deps.startCrawl.execute({ url, seed, maxStates, config });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(422).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.status(201).json(result.value);
|
||||||
|
});
|
||||||
|
// GET /api/sessions — list all sessions
|
||||||
|
router.get('/', async (_req, res) => {
|
||||||
|
const result = await deps.listSessions.execute({});
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(500).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json(result.value);
|
||||||
|
});
|
||||||
|
// GET /api/sessions/:id — session detail
|
||||||
|
router.get('/:id', async (req, res) => {
|
||||||
|
const sessionId = req.params['id'];
|
||||||
|
const result = await deps.getSession.execute({ sessionId });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(404).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json(result.value);
|
||||||
|
});
|
||||||
|
// DELETE /api/sessions/:id — stop a session
|
||||||
|
router.delete('/:id', async (req, res) => {
|
||||||
|
const sessionId = req.params['id'];
|
||||||
|
const result = await deps.stopCrawl.execute({ sessionId });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(404).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json({ stopped: true });
|
||||||
|
});
|
||||||
|
return router;
|
||||||
|
}
|
||||||
79
dist/modules/crawling/infrastructure/repositories/KyselyCrawlSessionRepository.js
vendored
Normal file
79
dist/modules/crawling/infrastructure/repositories/KyselyCrawlSessionRepository.js
vendored
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.KyselyCrawlSessionRepository = void 0;
|
||||||
|
const CrawlSession_1 = require("../../domain/entities/CrawlSession");
|
||||||
|
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
|
||||||
|
class KyselyCrawlSessionRepository {
|
||||||
|
constructor(db) {
|
||||||
|
this.db = db;
|
||||||
|
}
|
||||||
|
async save(session) {
|
||||||
|
const row = {
|
||||||
|
id: session.id.toString(),
|
||||||
|
url: session.url,
|
||||||
|
status: session.status,
|
||||||
|
seed: session.seed,
|
||||||
|
max_states: session.maxStates,
|
||||||
|
states_visited: session.statesVisited,
|
||||||
|
anomalies_found: 0,
|
||||||
|
started_at: Date.now(),
|
||||||
|
finished_at: null,
|
||||||
|
config_json: JSON.stringify(session.config),
|
||||||
|
};
|
||||||
|
await this.db
|
||||||
|
.insertInto('sessions')
|
||||||
|
.values(row)
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
async findById(id) {
|
||||||
|
const row = await this.db
|
||||||
|
.selectFrom('sessions')
|
||||||
|
.selectAll()
|
||||||
|
.where('id', '=', id.toString())
|
||||||
|
.executeTakeFirst();
|
||||||
|
if (!row)
|
||||||
|
return null;
|
||||||
|
return this.toDomain(row);
|
||||||
|
}
|
||||||
|
async findAll() {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('sessions')
|
||||||
|
.selectAll()
|
||||||
|
.orderBy('started_at', 'desc')
|
||||||
|
.execute();
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
async update(session) {
|
||||||
|
const isTerminal = session.status === 'completed' || session.status === 'failed' || session.status === 'stopped';
|
||||||
|
await this.db
|
||||||
|
.updateTable('sessions')
|
||||||
|
.set({
|
||||||
|
status: session.status,
|
||||||
|
states_visited: session.statesVisited,
|
||||||
|
finished_at: isTerminal ? Date.now() : null,
|
||||||
|
config_json: JSON.stringify(session.config),
|
||||||
|
})
|
||||||
|
.where('id', '=', session.id.toString())
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
toDomain(row) {
|
||||||
|
const props = {
|
||||||
|
url: row.url,
|
||||||
|
status: row.status,
|
||||||
|
seed: row.seed,
|
||||||
|
maxStates: row.max_states,
|
||||||
|
statesVisited: row.states_visited,
|
||||||
|
config: this.parseJson(row.config_json),
|
||||||
|
};
|
||||||
|
return CrawlSession_1.CrawlSession.reconstitute(props, UniqueId_1.UniqueId.from(row.id));
|
||||||
|
}
|
||||||
|
parseJson(json) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(json);
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.KyselyCrawlSessionRepository = KyselyCrawlSessionRepository;
|
||||||
72
dist/modules/crawling/infrastructure/repositories/KyselyStateRepository.js
vendored
Normal file
72
dist/modules/crawling/infrastructure/repositories/KyselyStateRepository.js
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"use strict";
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.KyselyStateRepository = void 0;
|
||||||
|
const CrawlState_1 = require("../../domain/entities/CrawlState");
|
||||||
|
const UniqueId_1 = require("../../../../shared/domain/UniqueId");
|
||||||
|
class KyselyStateRepository {
|
||||||
|
constructor(db) {
|
||||||
|
this.db = db;
|
||||||
|
}
|
||||||
|
async save(state) {
|
||||||
|
const row = {
|
||||||
|
id: state.id.toString(),
|
||||||
|
session_id: state.sessionId,
|
||||||
|
url: state.url,
|
||||||
|
title: state.title,
|
||||||
|
dom_snapshot_path: null,
|
||||||
|
visit_count: state.visitCount,
|
||||||
|
discovered_at: Date.now(),
|
||||||
|
};
|
||||||
|
await this.db
|
||||||
|
.insertInto('states')
|
||||||
|
.values(row)
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
async findById(id) {
|
||||||
|
const row = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.where('id', '=', id.toString())
|
||||||
|
.executeTakeFirst();
|
||||||
|
if (!row)
|
||||||
|
return null;
|
||||||
|
return this.toDomain(row);
|
||||||
|
}
|
||||||
|
async findAll() {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.execute();
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
async findBySessionId(sessionId) {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.where('session_id', '=', sessionId)
|
||||||
|
.execute();
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
async update(state) {
|
||||||
|
await this.db
|
||||||
|
.updateTable('states')
|
||||||
|
.set({
|
||||||
|
visit_count: state.visitCount,
|
||||||
|
url: state.url,
|
||||||
|
title: state.title,
|
||||||
|
})
|
||||||
|
.where('id', '=', state.id.toString())
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
toDomain(row) {
|
||||||
|
return CrawlState_1.CrawlState.create({
|
||||||
|
url: row.url,
|
||||||
|
title: row.title,
|
||||||
|
domSnapshot: '',
|
||||||
|
visitCount: row.visit_count,
|
||||||
|
stateId: row.id,
|
||||||
|
sessionId: row.session_id,
|
||||||
|
}, UniqueId_1.UniqueId.from(row.id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.KyselyStateRepository = KyselyStateRepository;
|
||||||
@@ -8,7 +8,7 @@ import { CrawlFailed } from '../events/CrawlFailed';
|
|||||||
|
|
||||||
type SessionStatusValue = 'running' | 'completed' | 'failed' | 'stopped';
|
type SessionStatusValue = 'running' | 'completed' | 'failed' | 'stopped';
|
||||||
|
|
||||||
interface CrawlSessionProps {
|
export interface CrawlSessionProps {
|
||||||
url: string;
|
url: string;
|
||||||
status: SessionStatusValue;
|
status: SessionStatusValue;
|
||||||
seed: number;
|
seed: number;
|
||||||
@@ -29,6 +29,11 @@ export class CrawlSession extends AggregateRoot<CrawlSessionProps> {
|
|||||||
super(props, id);
|
super(props, id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Reconstruct from persistence without emitting domain events */
|
||||||
|
static reconstitute(props: CrawlSessionProps, id: UniqueId): CrawlSession {
|
||||||
|
return new CrawlSession(props, id);
|
||||||
|
}
|
||||||
|
|
||||||
static create(request: CreateCrawlSessionRequest): Result<CrawlSession, string> {
|
static create(request: CreateCrawlSessionRequest): Result<CrawlSession, string> {
|
||||||
const urlResult = Url.create(request.url);
|
const urlResult = Url.create(request.url);
|
||||||
if (!urlResult.ok) {
|
if (!urlResult.ok) {
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
/**
|
||||||
|
* CrawlingStateGraph — copy of StateGraph for the crawling module.
|
||||||
|
* Uses BFS ordering by default for exploration scheduling.
|
||||||
|
*/
|
||||||
|
import { IState, IAction } from '../../../../core/interfaces';
|
||||||
|
|
||||||
|
export interface ITransition {
|
||||||
|
fromId: string;
|
||||||
|
action: IAction;
|
||||||
|
toId: string;
|
||||||
|
timestamp: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CrawlingStateGraph {
|
||||||
|
private states: Map<string, IState> = new Map();
|
||||||
|
private transitions: ITransition[] = [];
|
||||||
|
/** Insertion order for BFS */
|
||||||
|
private insertionOrder: string[] = [];
|
||||||
|
|
||||||
|
addState(state: IState): void {
|
||||||
|
if (!this.states.has(state.id)) {
|
||||||
|
this.states.set(state.id, state);
|
||||||
|
this.insertionOrder.push(state.id);
|
||||||
|
} else {
|
||||||
|
const existing = this.states.get(state.id)!;
|
||||||
|
this.states.set(state.id, { ...existing, visitCount: existing.visitCount + 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hasState(stateId: string): boolean {
|
||||||
|
return this.states.has(stateId);
|
||||||
|
}
|
||||||
|
|
||||||
|
getState(stateId: string): IState | undefined {
|
||||||
|
return this.states.get(stateId);
|
||||||
|
}
|
||||||
|
|
||||||
|
incrementVisit(stateId: string): void {
|
||||||
|
const state = this.states.get(stateId);
|
||||||
|
if (state) {
|
||||||
|
this.states.set(stateId, { ...state, visitCount: state.visitCount + 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
recordTransition(fromId: string, action: IAction, toId: string): void {
|
||||||
|
this.transitions.push({ fromId, action, toId, timestamp: Date.now() });
|
||||||
|
}
|
||||||
|
|
||||||
|
getUnvisited(): IState[] {
|
||||||
|
return this.insertionOrder
|
||||||
|
.map((id) => this.states.get(id)!)
|
||||||
|
.filter((s) => s.visitCount === 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** BFS heuristic: returns the oldest unvisited state, or null if none */
|
||||||
|
getNextToExplore(): IState | null {
|
||||||
|
const unvisited = this.getUnvisited();
|
||||||
|
return unvisited.length > 0 ? unvisited[0]! : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
getAllStates(): IState[] {
|
||||||
|
return this.insertionOrder.map((id) => this.states.get(id)!);
|
||||||
|
}
|
||||||
|
|
||||||
|
getTransitions(): ITransition[] {
|
||||||
|
return [...this.transitions];
|
||||||
|
}
|
||||||
|
|
||||||
|
toJSON(): object {
|
||||||
|
return {
|
||||||
|
stateCount: this.states.size,
|
||||||
|
transitionCount: this.transitions.length,
|
||||||
|
states: this.getAllStates().map((s) => ({
|
||||||
|
id: s.id,
|
||||||
|
url: s.url,
|
||||||
|
title: s.title,
|
||||||
|
visitCount: s.visitCount,
|
||||||
|
})),
|
||||||
|
transitions: this.transitions.map((t) => ({
|
||||||
|
fromId: t.fromId,
|
||||||
|
toId: t.toId,
|
||||||
|
actionId: t.action.id,
|
||||||
|
actionType: t.action.type,
|
||||||
|
timestamp: t.timestamp,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,273 @@
|
|||||||
|
/**
|
||||||
|
* ExplorationOrchestrator — adapts ExplorationEngine to use ICrawlerEngine port.
|
||||||
|
* All dependencies are injected via constructor; no direct plugin imports.
|
||||||
|
*/
|
||||||
|
import { IState, IAction, IAnomaly, ILogger, ICollector, IExporter, IReproducer, IFuzzingPlugin } from '../../../../core/interfaces';
|
||||||
|
import { ICrawlerEngine } from '../../domain/ports/ICrawlerEngine';
|
||||||
|
import { CrawlingStateGraph } from './CrawlingStateGraph';
|
||||||
|
import { AnomalyDetector } from '../../../../core/AnomalyDetector';
|
||||||
|
import { NullLogger } from '../../../../core/Logger';
|
||||||
|
import { ExplorationConfig } from '../../../../core/ExplorationConfig';
|
||||||
|
|
||||||
|
export interface OrchestratorEventCallbacks {
|
||||||
|
onSessionStarted?: (sessionId: string, url: string) => void;
|
||||||
|
onStateDiscovered?: (sessionId: string, stateId: string, url: string, title: string) => void;
|
||||||
|
onActionExecuted?: (sessionId: string, actionType: string, selector: string | undefined, timestamp: number) => void;
|
||||||
|
onAnomalyDetected?: (sessionId: string, anomaly: IAnomaly) => void;
|
||||||
|
onSessionCompleted?: (sessionId: string, statesVisited: number, anomaliesFound: number) => void;
|
||||||
|
onSessionError?: (sessionId: string, error: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type StateHook = (
|
||||||
|
state: IState,
|
||||||
|
engine: ICrawlerEngine,
|
||||||
|
sessionId: string,
|
||||||
|
actionTrace: IAction[]
|
||||||
|
) => Promise<IAnomaly[]>;
|
||||||
|
|
||||||
|
export interface OrchestratorConfig {
|
||||||
|
graph: CrawlingStateGraph;
|
||||||
|
engine: ICrawlerEngine;
|
||||||
|
detector?: AnomalyDetector;
|
||||||
|
collectors?: ICollector[];
|
||||||
|
exporters?: IExporter[];
|
||||||
|
reproducer?: IReproducer;
|
||||||
|
logger?: ILogger;
|
||||||
|
seed: number;
|
||||||
|
url: string;
|
||||||
|
maxSteps?: number;
|
||||||
|
outputDir?: string;
|
||||||
|
events?: OrchestratorEventCallbacks;
|
||||||
|
sessionId?: string;
|
||||||
|
explorationConfig?: Partial<ExplorationConfig>;
|
||||||
|
fuzzingPlugin?: IFuzzingPlugin;
|
||||||
|
stateHooks?: StateHook[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OrchestratorResult {
|
||||||
|
statesVisited: number;
|
||||||
|
anomaliesFound: number;
|
||||||
|
anomalies: IAnomaly[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export class ExplorationOrchestrator {
|
||||||
|
private graph: CrawlingStateGraph;
|
||||||
|
private engine: ICrawlerEngine;
|
||||||
|
private detector: AnomalyDetector;
|
||||||
|
private collectors: ICollector[];
|
||||||
|
private exporters: IExporter[];
|
||||||
|
private reproducer?: IReproducer;
|
||||||
|
private logger: ILogger;
|
||||||
|
private seed: number;
|
||||||
|
private url: string;
|
||||||
|
private maxSteps: number;
|
||||||
|
private outputDir: string;
|
||||||
|
private events: OrchestratorEventCallbacks;
|
||||||
|
private sessionId: string;
|
||||||
|
private explorationConfig: Partial<ExplorationConfig>;
|
||||||
|
private fuzzingPlugin?: IFuzzingPlugin;
|
||||||
|
private stateHooks: StateHook[];
|
||||||
|
private actionTrace: IAction[] = [];
|
||||||
|
private aborted = false;
|
||||||
|
|
||||||
|
constructor(config: OrchestratorConfig) {
|
||||||
|
this.graph = config.graph;
|
||||||
|
this.engine = config.engine;
|
||||||
|
this.detector = config.detector ?? new AnomalyDetector();
|
||||||
|
this.collectors = config.collectors ?? [];
|
||||||
|
this.exporters = config.exporters ?? [];
|
||||||
|
this.reproducer = config.reproducer;
|
||||||
|
this.logger = config.logger ?? new NullLogger();
|
||||||
|
this.seed = config.seed;
|
||||||
|
this.url = config.url;
|
||||||
|
this.maxSteps = config.maxSteps ?? 100;
|
||||||
|
this.outputDir = config.outputDir ?? './reports';
|
||||||
|
this.events = config.events ?? {};
|
||||||
|
this.sessionId = config.sessionId ?? `${Date.now()}_${config.seed}`;
|
||||||
|
this.explorationConfig = config.explorationConfig ?? {};
|
||||||
|
this.fuzzingPlugin = config.fuzzingPlugin;
|
||||||
|
this.stateHooks = config.stateHooks ?? [];
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): void {
|
||||||
|
this.aborted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async run(): Promise<OrchestratorResult> {
|
||||||
|
const anomalies: IAnomaly[] = [];
|
||||||
|
let stepsExecuted = 0;
|
||||||
|
let depth = 0;
|
||||||
|
const sessionTimeoutMs = this.explorationConfig.sessionTimeoutMs ?? 0;
|
||||||
|
const maxDepth = this.explorationConfig.maxDepth ?? Infinity;
|
||||||
|
const sessionStart = Date.now();
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'session_start',
|
||||||
|
timestamp: sessionStart,
|
||||||
|
seed: this.seed,
|
||||||
|
target: this.url,
|
||||||
|
});
|
||||||
|
this.events.onSessionStarted?.(this.sessionId, this.url);
|
||||||
|
|
||||||
|
const isTimedOut = (): boolean =>
|
||||||
|
sessionTimeoutMs > 0 && Date.now() - sessionStart >= sessionTimeoutMs;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.engine.launch(this.url);
|
||||||
|
|
||||||
|
const initialState = await this.engine.captureState();
|
||||||
|
this.graph.addState(initialState);
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'state_discovered',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: initialState.id,
|
||||||
|
url: initialState.url,
|
||||||
|
title: initialState.title,
|
||||||
|
});
|
||||||
|
this.events.onStateDiscovered?.(this.sessionId, initialState.id, initialState.url, initialState.title);
|
||||||
|
|
||||||
|
while (stepsExecuted < this.maxSteps && !this.aborted && !isTimedOut() && depth <= maxDepth) {
|
||||||
|
const currentState = this.graph.getNextToExplore();
|
||||||
|
if (!currentState) break;
|
||||||
|
|
||||||
|
this.graph.incrementVisit(currentState.id);
|
||||||
|
|
||||||
|
const actions = await this.engine.discoverActions(currentState);
|
||||||
|
if (actions.length === 0) continue;
|
||||||
|
|
||||||
|
const actionIndex = (this.seed + stepsExecuted) % actions.length;
|
||||||
|
const action = actions[actionIndex]!;
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'action_executed',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
actionId: action.id,
|
||||||
|
type: action.type,
|
||||||
|
selector: action.selector,
|
||||||
|
value: action.value,
|
||||||
|
url: action.url,
|
||||||
|
});
|
||||||
|
this.events.onActionExecuted?.(this.sessionId, action.type, action.selector, Date.now());
|
||||||
|
|
||||||
|
const observation = await this.engine.executeAction(action);
|
||||||
|
this.actionTrace.push(action);
|
||||||
|
|
||||||
|
if (!this.graph.hasState(observation.newStateId)) {
|
||||||
|
const newState = await this.engine.captureState();
|
||||||
|
this.graph.addState(newState);
|
||||||
|
depth += 1;
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'state_discovered',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: newState.id,
|
||||||
|
url: newState.url,
|
||||||
|
title: newState.title,
|
||||||
|
});
|
||||||
|
this.events.onStateDiscovered?.(this.sessionId, newState.id, newState.url, newState.title);
|
||||||
|
|
||||||
|
for (const hook of this.stateHooks) {
|
||||||
|
const hookAnomalies = await hook(newState, this.engine, this.sessionId, [...this.actionTrace]).catch(() => []);
|
||||||
|
for (const anomaly of hookAnomalies) {
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
this.logger.log({
|
||||||
|
event: 'anomaly_detected',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
anomalyId: anomaly.id,
|
||||||
|
type: anomaly.type,
|
||||||
|
severity: anomaly.severity,
|
||||||
|
});
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
await exporter.export(anomaly, `${this.outputDir}/${anomaly.id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.graph.recordTransition(currentState.id, action, observation.newStateId);
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'exploration_step',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
stateId: currentState.id,
|
||||||
|
actionId: action.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
const detected = this.detector.detect(observation, [...this.actionTrace]);
|
||||||
|
for (const anomaly of detected) {
|
||||||
|
for (const collector of this.collectors) {
|
||||||
|
const evidence = await collector.collect(anomaly, this.engine as unknown as import('../../../../core/interfaces').IInteractionAgent);
|
||||||
|
Object.assign(anomaly.evidence, evidence);
|
||||||
|
}
|
||||||
|
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'anomaly_detected',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
anomalyId: anomaly.id,
|
||||||
|
type: anomaly.type,
|
||||||
|
severity: anomaly.severity,
|
||||||
|
});
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
const reportDir = `${this.outputDir}/${anomaly.id}`;
|
||||||
|
await exporter.export(anomaly, reportDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stepsExecuted += 1;
|
||||||
|
|
||||||
|
if (
|
||||||
|
this.fuzzingPlugin &&
|
||||||
|
this.explorationConfig.fuzzingEnabled !== false &&
|
||||||
|
currentState.domSnapshot
|
||||||
|
) {
|
||||||
|
const fuzzActions = this.fuzzingPlugin.generateFuzzActions(
|
||||||
|
currentState.domSnapshot,
|
||||||
|
currentState
|
||||||
|
);
|
||||||
|
for (const fuzzAction of fuzzActions) {
|
||||||
|
if (this.aborted || isTimedOut()) break;
|
||||||
|
const fuzzObs = await this.engine.executeAction(fuzzAction);
|
||||||
|
this.actionTrace.push(fuzzAction);
|
||||||
|
const fuzzAnomalies = this.detector.detect(fuzzObs, [...this.actionTrace]);
|
||||||
|
for (const anomaly of fuzzAnomalies) {
|
||||||
|
for (const collector of this.collectors) {
|
||||||
|
const evidence = await collector.collect(anomaly, this.engine as unknown as import('../../../../core/interfaces').IInteractionAgent);
|
||||||
|
Object.assign(anomaly.evidence, evidence);
|
||||||
|
}
|
||||||
|
anomalies.push(anomaly);
|
||||||
|
this.events.onAnomalyDetected?.(this.sessionId, anomaly);
|
||||||
|
for (const exporter of this.exporters) {
|
||||||
|
await exporter.export(anomaly, `${this.outputDir}/${anomaly.id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
this.events.onSessionError?.(this.sessionId, msg);
|
||||||
|
await this.engine.close().catch(() => undefined);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.engine.close();
|
||||||
|
|
||||||
|
const statesVisited = this.graph.getAllStates().filter((s) => s.visitCount > 0).length;
|
||||||
|
|
||||||
|
this.logger.log({
|
||||||
|
event: 'session_end',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
statesVisited,
|
||||||
|
anomaliesFound: anomalies.length,
|
||||||
|
});
|
||||||
|
this.events.onSessionCompleted?.(this.sessionId, statesVisited, anomalies.length);
|
||||||
|
|
||||||
|
return { statesVisited, anomaliesFound: anomalies.length, anomalies };
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
/**
|
||||||
|
* PlaywrightCrawlerEngine — adapts PlaywrightAgent to implement the ICrawlerEngine port.
|
||||||
|
*/
|
||||||
|
import { PlaywrightAgent, PlaywrightAgentConfig } from '../../../../plugins/agents/PlaywrightAgent';
|
||||||
|
import { ICrawlerEngine } from '../../domain/ports/ICrawlerEngine';
|
||||||
|
|
||||||
|
export class PlaywrightCrawlerEngine extends PlaywrightAgent implements ICrawlerEngine {
|
||||||
|
constructor(config: PlaywrightAgentConfig = {}) {
|
||||||
|
super(config);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
/**
|
||||||
|
* CrawlingController — thin Express controller for crawling routes.
|
||||||
|
* Delegates to use cases; returns Result-based responses.
|
||||||
|
*/
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { StartCrawlCommand } from '../../application/commands/StartCrawlCommand';
|
||||||
|
import { StopCrawlCommand } from '../../application/commands/StopCrawlCommand';
|
||||||
|
import { GetSessionQuery } from '../../application/queries/GetSessionQuery';
|
||||||
|
import { ListSessionsQuery } from '../../application/queries/ListSessionsQuery';
|
||||||
|
|
||||||
|
export interface CrawlingControllerDeps {
|
||||||
|
startCrawl: StartCrawlCommand;
|
||||||
|
stopCrawl: StopCrawlCommand;
|
||||||
|
getSession: GetSessionQuery;
|
||||||
|
listSessions: ListSessionsQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createCrawlingRouter(deps: CrawlingControllerDeps): Router {
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// POST /api/sessions — start a new crawl session
|
||||||
|
router.post('/', async (req: Request, res: Response) => {
|
||||||
|
const body = req.body as { url?: string; seed?: number; maxStates?: number; config?: Record<string, unknown> };
|
||||||
|
const { url, seed = 42, maxStates = 50, config } = body;
|
||||||
|
|
||||||
|
if (!url || typeof url !== 'string') {
|
||||||
|
res.status(400).json({ error: 'url is required' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await deps.startCrawl.execute({ url, seed, maxStates, config });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(422).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res.status(201).json(result.value);
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/sessions — list all sessions
|
||||||
|
router.get('/', async (_req: Request, res: Response) => {
|
||||||
|
const result = await deps.listSessions.execute({});
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(500).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json(result.value);
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/sessions/:id — session detail
|
||||||
|
router.get('/:id', async (req: Request, res: Response) => {
|
||||||
|
const sessionId = req.params['id'] as string;
|
||||||
|
const result = await deps.getSession.execute({ sessionId });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(404).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json(result.value);
|
||||||
|
});
|
||||||
|
|
||||||
|
// DELETE /api/sessions/:id — stop a session
|
||||||
|
router.delete('/:id', async (req: Request, res: Response) => {
|
||||||
|
const sessionId = req.params['id'] as string;
|
||||||
|
const result = await deps.stopCrawl.execute({ sessionId });
|
||||||
|
if (!result.ok) {
|
||||||
|
res.status(404).json({ error: result.error });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.json({ stopped: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
/**
|
||||||
|
* KyselyCrawlSessionRepository — implements ICrawlSessionRepository using Kysely.
|
||||||
|
*/
|
||||||
|
import { Kysely } from 'kysely';
|
||||||
|
import { Database, SessionTable } from '../../../../shared/infrastructure/DatabaseConnection';
|
||||||
|
import { ICrawlSessionRepository } from '../../domain/ports/ICrawlSessionRepository';
|
||||||
|
import { CrawlSession, CrawlSessionProps } from '../../domain/entities/CrawlSession';
|
||||||
|
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||||
|
|
||||||
|
export class KyselyCrawlSessionRepository implements ICrawlSessionRepository {
|
||||||
|
constructor(private readonly db: Kysely<Database>) {}
|
||||||
|
|
||||||
|
async save(session: CrawlSession): Promise<void> {
|
||||||
|
const row: SessionTable = {
|
||||||
|
id: session.id.toString(),
|
||||||
|
url: session.url,
|
||||||
|
status: session.status,
|
||||||
|
seed: session.seed,
|
||||||
|
max_states: session.maxStates,
|
||||||
|
states_visited: session.statesVisited,
|
||||||
|
anomalies_found: 0,
|
||||||
|
started_at: Date.now(),
|
||||||
|
finished_at: null,
|
||||||
|
config_json: JSON.stringify(session.config),
|
||||||
|
};
|
||||||
|
|
||||||
|
await this.db
|
||||||
|
.insertInto('sessions')
|
||||||
|
.values(row)
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
async findById(id: UniqueId): Promise<CrawlSession | null> {
|
||||||
|
const row = await this.db
|
||||||
|
.selectFrom('sessions')
|
||||||
|
.selectAll()
|
||||||
|
.where('id', '=', id.toString())
|
||||||
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
if (!row) return null;
|
||||||
|
|
||||||
|
return this.toDomain(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
async findAll(): Promise<CrawlSession[]> {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('sessions')
|
||||||
|
.selectAll()
|
||||||
|
.orderBy('started_at', 'desc')
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
|
||||||
|
async update(session: CrawlSession): Promise<void> {
|
||||||
|
const isTerminal = session.status === 'completed' || session.status === 'failed' || session.status === 'stopped';
|
||||||
|
|
||||||
|
await this.db
|
||||||
|
.updateTable('sessions')
|
||||||
|
.set({
|
||||||
|
status: session.status,
|
||||||
|
states_visited: session.statesVisited,
|
||||||
|
finished_at: isTerminal ? Date.now() : null,
|
||||||
|
config_json: JSON.stringify(session.config),
|
||||||
|
})
|
||||||
|
.where('id', '=', session.id.toString())
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
private toDomain(row: SessionTable): CrawlSession {
|
||||||
|
const props: CrawlSessionProps = {
|
||||||
|
url: row.url,
|
||||||
|
status: row.status as CrawlSessionProps['status'],
|
||||||
|
seed: row.seed,
|
||||||
|
maxStates: row.max_states,
|
||||||
|
statesVisited: row.states_visited,
|
||||||
|
config: this.parseJson(row.config_json),
|
||||||
|
};
|
||||||
|
|
||||||
|
return CrawlSession.reconstitute(props, UniqueId.from(row.id));
|
||||||
|
}
|
||||||
|
|
||||||
|
private parseJson(json: string): Record<string, unknown> {
|
||||||
|
try {
|
||||||
|
return JSON.parse(json) as Record<string, unknown>;
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
/**
|
||||||
|
* KyselyStateRepository — implements IStateRepository using Kysely.
|
||||||
|
*/
|
||||||
|
import { Kysely } from 'kysely';
|
||||||
|
import { Database, StateTable } from '../../../../shared/infrastructure/DatabaseConnection';
|
||||||
|
import { IStateRepository } from '../../domain/ports/IStateRepository';
|
||||||
|
import { CrawlState } from '../../domain/entities/CrawlState';
|
||||||
|
import { UniqueId } from '../../../../shared/domain/UniqueId';
|
||||||
|
|
||||||
|
export class KyselyStateRepository implements IStateRepository {
|
||||||
|
constructor(private readonly db: Kysely<Database>) {}
|
||||||
|
|
||||||
|
async save(state: CrawlState): Promise<void> {
|
||||||
|
const row: StateTable = {
|
||||||
|
id: state.id.toString(),
|
||||||
|
session_id: state.sessionId,
|
||||||
|
url: state.url,
|
||||||
|
title: state.title,
|
||||||
|
dom_snapshot_path: null,
|
||||||
|
visit_count: state.visitCount,
|
||||||
|
discovered_at: Date.now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
await this.db
|
||||||
|
.insertInto('states')
|
||||||
|
.values(row)
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
async findById(id: UniqueId): Promise<CrawlState | null> {
|
||||||
|
const row = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.where('id', '=', id.toString())
|
||||||
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
if (!row) return null;
|
||||||
|
|
||||||
|
return this.toDomain(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
async findAll(): Promise<CrawlState[]> {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
|
||||||
|
async findBySessionId(sessionId: string): Promise<CrawlState[]> {
|
||||||
|
const rows = await this.db
|
||||||
|
.selectFrom('states')
|
||||||
|
.selectAll()
|
||||||
|
.where('session_id', '=', sessionId)
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
return rows.map((row) => this.toDomain(row));
|
||||||
|
}
|
||||||
|
|
||||||
|
async update(state: CrawlState): Promise<void> {
|
||||||
|
await this.db
|
||||||
|
.updateTable('states')
|
||||||
|
.set({
|
||||||
|
visit_count: state.visitCount,
|
||||||
|
url: state.url,
|
||||||
|
title: state.title,
|
||||||
|
})
|
||||||
|
.where('id', '=', state.id.toString())
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
private toDomain(row: StateTable): CrawlState {
|
||||||
|
return CrawlState.create(
|
||||||
|
{
|
||||||
|
url: row.url,
|
||||||
|
title: row.title,
|
||||||
|
domSnapshot: '',
|
||||||
|
visitCount: row.visit_count,
|
||||||
|
stateId: row.id,
|
||||||
|
sessionId: row.session_id,
|
||||||
|
},
|
||||||
|
UniqueId.from(row.id)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user