From 222979574a3ee6af3f07f2662f33deb10f3de85e Mon Sep 17 00:00:00 2001 From: Kitos Date: Tue, 17 Feb 2026 17:47:47 +0100 Subject: [PATCH] =?UTF-8?q?feat(phase-38):=20automatic=20intelligence=20?= =?UTF-8?q?=E2=80=94=20OSINT=20enrichment=20+=20stale=20coverage=20detecti?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tarea 4.1 — OSINT Enrichment: - Add OsintItem model with source_type, severity, CVSS metadata, review flag - Add Alembic migration b022 with osint_items table and optimized indexes - Add osint_enrichment_service with NVD API integration, deduplication, rate limiting - Add OSINT router: GET /osint/items, /osint/summary, /osint/technique/{id} - Add POST /osint/items/{id}/review to mark items as reviewed - Add POST /osint/enrich/{technique_id} for manual single-technique enrichment - Techniques with new CVEs are automatically flagged review_required=True - Register weekly enrichment job in APScheduler - Add NVD_API_KEY config setting for optional increased rate limits Tarea 4.2 — Stale Coverage Detection: - Add stale_detection_service that flags techniques with no validated test in the last N days, or never-validated but with a coverage status - Configurable threshold via STALE_THRESHOLD_DAYS setting (default 365) - Register daily stale detection job in APScheduler - Only flags techniques not already marked review_required --- .../alembic/versions/b022_add_osint_items.py | 47 +++++ backend/app/config.py | 4 + backend/app/jobs/mitre_sync_job.py | 47 ++++- backend/app/main.py | 2 + backend/app/models/__init__.py | 3 +- backend/app/models/osint_item.py | 40 ++++ backend/app/routers/osint.py | 197 ++++++++++++++++++ .../app/services/osint_enrichment_service.py | 191 +++++++++++++++++ .../app/services/stale_detection_service.py | 78 +++++++ 9 files changed, 607 insertions(+), 2 deletions(-) create mode 100644 backend/alembic/versions/b022_add_osint_items.py create mode 100644 backend/app/models/osint_item.py create mode 100644 backend/app/routers/osint.py create mode 100644 backend/app/services/osint_enrichment_service.py create mode 100644 backend/app/services/stale_detection_service.py diff --git a/backend/alembic/versions/b022_add_osint_items.py b/backend/alembic/versions/b022_add_osint_items.py new file mode 100644 index 0000000..d5ca642 --- /dev/null +++ b/backend/alembic/versions/b022_add_osint_items.py @@ -0,0 +1,47 @@ +"""add_osint_items + +Revision ID: b022osintitems +Revises: b021phasetiming +Create Date: 2026-02-17 22:00:00.000000 + +Add osint_items table for OSINT enrichment data linked to techniques. +""" + +from alembic import op + +revision = "b022osintitems" +down_revision = "b021phasetiming" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute(""" + CREATE TABLE IF NOT EXISTS osint_items ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + technique_id UUID NOT NULL REFERENCES techniques(id), + source_type VARCHAR(50) NOT NULL, + source_url TEXT NOT NULL, + title VARCHAR(500) NOT NULL, + description TEXT, + severity VARCHAR(20), + discovered_at TIMESTAMP NOT NULL DEFAULT now(), + reviewed BOOLEAN NOT NULL DEFAULT false, + metadata JSONB DEFAULT '{}'::jsonb + ); + + CREATE INDEX IF NOT EXISTS ix_osint_items_technique_id + ON osint_items (technique_id); + + CREATE INDEX IF NOT EXISTS ix_osint_items_source_type + ON osint_items (source_type); + + CREATE INDEX IF NOT EXISTS ix_osint_items_reviewed + ON osint_items (reviewed) WHERE NOT reviewed; + """) + + +def downgrade() -> None: + op.execute(""" + DROP TABLE IF EXISTS osint_items CASCADE; + """) diff --git a/backend/app/config.py b/backend/app/config.py index 9f93577..4f61508 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -58,6 +58,10 @@ class Settings(BaseSettings): TEMPO_API_TOKEN: str = "" TEMPO_DEFAULT_WORK_TYPE: str = "Red Team" + # ── OSINT / Intelligence ──────────────────────────────────────── + NVD_API_KEY: str = "" # optional; increases NVD rate limit from 5/30s to 50/30s + STALE_THRESHOLD_DAYS: int = 365 # days before coverage is considered stale + # ── Reporting ───────────────────────────────────────────────────── REPORT_TEMPLATES_DIR: str = "app/templates/reports" REPORT_OUTPUT_DIR: str = "/tmp/aegis_reports" diff --git a/backend/app/jobs/mitre_sync_job.py b/backend/app/jobs/mitre_sync_job.py index e83a1ef..133e37a 100644 --- a/backend/app/jobs/mitre_sync_job.py +++ b/backend/app/jobs/mitre_sync_job.py @@ -21,6 +21,8 @@ from app.services.notification_service import cleanup_old_notifications from app.services.snapshot_service import create_snapshot, cleanup_old_snapshots from app.services.campaign_scheduler_service import check_and_run_recurring_campaigns from app.jobs.jira_sync_job import sync_all_jira_links +from app.services.osint_enrichment_service import enrich_all_techniques +from app.services.stale_detection_service import detect_stale_coverage logger = logging.getLogger(__name__) @@ -108,6 +110,32 @@ def _run_intel_scan() -> None: db.close() +def _run_osint_enrichment() -> None: + """Execute weekly OSINT enrichment inside its own DB session.""" + logger.info("Scheduled OSINT enrichment job starting...") + db = SessionLocal() + try: + total = enrich_all_techniques(db) + logger.info("OSINT enrichment finished — %d new items", total) + except Exception: + logger.exception("OSINT enrichment job failed") + finally: + db.close() + + +def _run_stale_detection() -> None: + """Execute daily stale coverage detection inside its own DB session.""" + logger.info("Scheduled stale coverage detection starting...") + db = SessionLocal() + try: + count = detect_stale_coverage(db) + logger.info("Stale detection finished — %d techniques flagged", count) + except Exception: + logger.exception("Stale coverage detection job failed") + finally: + db.close() + + # --------------------------------------------------------------------------- # Scheduler bootstrap # --------------------------------------------------------------------------- @@ -173,9 +201,26 @@ def start_scheduler() -> None: name="Jira link sync (hourly)", replace_existing=True, ) + scheduler.add_job( + _run_osint_enrichment, + trigger="interval", + weeks=1, + id="osint_enrichment", + name="OSINT enrichment (weekly)", + replace_existing=True, + ) + scheduler.add_job( + _run_stale_detection, + trigger="interval", + hours=24, + id="stale_detection", + name="Stale coverage detection (daily)", + replace_existing=True, + ) scheduler.start() logger.info( "Background scheduler started — mitre_sync (24h), intel_scan (7d), " "notification_cleanup (24h), weekly_snapshot (Sundays 00:00), " - "recurring_campaigns (daily), jira_sync (1h)" + "recurring_campaigns (daily), jira_sync (1h), " + "osint_enrichment (weekly), stale_detection (daily)" ) diff --git a/backend/app/main.py b/backend/app/main.py index d5f9e97..81685ed 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -37,6 +37,7 @@ from app.routers import worklogs as worklogs_router from app.routers import professional_reports as professional_reports_router from app.routers import analytics as analytics_router from app.routers import advanced_metrics as advanced_metrics_router +from app.routers import osint as osint_router from app.domain.exceptions import DomainException from app.middleware.error_handler import domain_exception_handler from app.storage import ensure_bucket_exists @@ -120,6 +121,7 @@ app.include_router(worklogs_router.router, prefix="/api/v1") app.include_router(professional_reports_router.router, prefix="/api/v1") app.include_router(analytics_router.router, prefix="/api/v1") app.include_router(advanced_metrics_router.router, prefix="/api/v1") +app.include_router(osint_router.router, prefix="/api/v1") @app.get("/health", include_in_schema=False) diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index fbb7a9b..6634c9b 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -18,6 +18,7 @@ from app.models.compliance import ComplianceFramework, ComplianceControl, Compli from app.models.coverage_snapshot import CoverageSnapshot, SnapshotTechniqueState from app.models.jira_link import JiraLink, JiraLinkEntityType, JiraSyncDirection from app.models.worklog import Worklog +from app.models.osint_item import OsintItem from app.models.enums import TechniqueStatus, TestState, TestResult, TeamSide __all__ = [ @@ -30,6 +31,6 @@ __all__ = [ "ComplianceFramework", "ComplianceControl", "ComplianceControlMapping", "CoverageSnapshot", "SnapshotTechniqueState", "JiraLink", "JiraLinkEntityType", "JiraSyncDirection", - "Worklog", + "Worklog", "OsintItem", "TechniqueStatus", "TestState", "TestResult", "TeamSide", ] diff --git a/backend/app/models/osint_item.py b/backend/app/models/osint_item.py new file mode 100644 index 0000000..4ef0d87 --- /dev/null +++ b/backend/app/models/osint_item.py @@ -0,0 +1,40 @@ +"""OSINT enrichment items — CVEs, blogs, PoCs, and advisories linked to techniques.""" + +import uuid +from datetime import datetime + +from sqlalchemy import Column, String, Text, Boolean, DateTime, ForeignKey +from sqlalchemy.dialects.postgresql import UUID, JSONB +from sqlalchemy.orm import relationship + +from app.database import Base + + +class OsintItem(Base): + """Represents an OSINT data point (CVE, blog, PoC, advisory) associated + with a MITRE ATT&CK technique. + + Used by the enrichment pipeline to surface relevant threat intelligence + for each technique, flagging those that need review. + """ + + __tablename__ = "osint_items" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + technique_id = Column( + UUID(as_uuid=True), + ForeignKey("techniques.id"), + nullable=False, + index=True, + ) + source_type = Column(String(50), nullable=False) # "cve", "blog", "poc", "advisory" + source_url = Column(Text, nullable=False) + title = Column(String(500), nullable=False) + description = Column(Text, nullable=True) + severity = Column(String(20), nullable=True) # CRITICAL, HIGH, MEDIUM, LOW, UNKNOWN + discovered_at = Column(DateTime, default=datetime.utcnow, nullable=False) + reviewed = Column(Boolean, default=False) + metadata_ = Column("metadata", JSONB, default={}) + + # ── Relationships ───────────────────────────────────────────────── + technique = relationship("Technique", backref="osint_items") diff --git a/backend/app/routers/osint.py b/backend/app/routers/osint.py new file mode 100644 index 0000000..13bec68 --- /dev/null +++ b/backend/app/routers/osint.py @@ -0,0 +1,197 @@ +"""OSINT enrichment endpoints — view, review, and trigger enrichment of +OSINT items (CVEs, advisories, etc.) linked to techniques. +""" + +from uuid import UUID + +from fastapi import APIRouter, Depends, Query, HTTPException, status +from pydantic import BaseModel +from sqlalchemy.orm import Session + +from app.database import get_db +from app.dependencies.auth import get_current_user, require_any_role +from app.models.osint_item import OsintItem +from app.models.technique import Technique +from app.models.user import User +from app.services.osint_enrichment_service import ( + enrich_technique_with_cves, + get_osint_items_for_technique, + mark_osint_reviewed, + get_unreviewed_count, +) + +router = APIRouter(prefix="/osint", tags=["osint"]) + + +# ── Schemas ────────────────────────────────────────────────────────── + + +class OsintItemOut(BaseModel): + id: str + technique_id: str + source_type: str + source_url: str + title: str + description: str | None + severity: str | None + discovered_at: str | None + reviewed: bool + metadata_: dict | None = None + + class Config: + from_attributes = True + + +# ── Endpoints ──────────────────────────────────────────────────────── + + +@router.get("/items") +def list_osint_items( + technique_id: UUID | None = Query(None), + source_type: str | None = Query(None), + reviewed: bool | None = Query(None), + offset: int = Query(0, ge=0), + limit: int = Query(50, ge=1, le=200), + db: Session = Depends(get_db), + user: User = Depends(get_current_user), +): + """List OSINT items with optional filters.""" + query = db.query(OsintItem) + if technique_id: + query = query.filter(OsintItem.technique_id == technique_id) + if source_type: + query = query.filter(OsintItem.source_type == source_type) + if reviewed is not None: + query = query.filter(OsintItem.reviewed == reviewed) + + total = query.count() + items = ( + query.order_by(OsintItem.discovered_at.desc()) + .offset(offset) + .limit(limit) + .all() + ) + + return { + "total": total, + "items": [ + { + "id": str(item.id), + "technique_id": str(item.technique_id), + "source_type": item.source_type, + "source_url": item.source_url, + "title": item.title, + "description": item.description, + "severity": item.severity, + "discovered_at": item.discovered_at.isoformat() if item.discovered_at else None, + "reviewed": item.reviewed, + "metadata": item.metadata_, + } + for item in items + ], + } + + +@router.get("/summary") +def osint_summary( + db: Session = Depends(get_db), + user: User = Depends(get_current_user), +): + """Summary statistics for OSINT items.""" + from sqlalchemy import func + + total = db.query(func.count(OsintItem.id)).scalar() or 0 + unreviewed = get_unreviewed_count(db) + + by_severity = dict( + db.query(OsintItem.severity, func.count(OsintItem.id)) + .group_by(OsintItem.severity) + .all() + ) + + by_type = dict( + db.query(OsintItem.source_type, func.count(OsintItem.id)) + .group_by(OsintItem.source_type) + .all() + ) + + techniques_with_items = ( + db.query(func.count(func.distinct(OsintItem.technique_id))).scalar() or 0 + ) + + return { + "total_items": total, + "unreviewed": unreviewed, + "techniques_with_items": techniques_with_items, + "by_severity": by_severity, + "by_type": by_type, + } + + +@router.post("/items/{item_id}/review") +def review_osint_item( + item_id: UUID, + db: Session = Depends(get_db), + user: User = Depends(get_current_user), +): + """Mark an OSINT item as reviewed.""" + item = mark_osint_reviewed(db, str(item_id)) + if not item: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="OSINT item not found", + ) + return {"id": str(item.id), "reviewed": True} + + +@router.post("/enrich/{technique_id}") +def trigger_technique_enrichment( + technique_id: UUID, + db: Session = Depends(get_db), + user: User = Depends(require_any_role("red_lead", "blue_lead")), +): + """Manually trigger OSINT enrichment for a single technique.""" + technique = db.query(Technique).filter(Technique.id == technique_id).first() + if not technique: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Technique not found", + ) + + count = enrich_technique_with_cves(db, technique) + return { + "technique_id": str(technique.id), + "mitre_id": technique.mitre_id, + "new_items": count, + } + + +@router.get("/technique/{technique_id}") +def get_technique_osint( + technique_id: UUID, + source_type: str | None = Query(None), + reviewed: bool | None = Query(None), + db: Session = Depends(get_db), + user: User = Depends(get_current_user), +): + """Get all OSINT items for a specific technique.""" + items = get_osint_items_for_technique( + db, + str(technique_id), + source_type=source_type, + reviewed=reviewed, + ) + return [ + { + "id": str(item.id), + "source_type": item.source_type, + "source_url": item.source_url, + "title": item.title, + "description": item.description, + "severity": item.severity, + "discovered_at": item.discovered_at.isoformat() if item.discovered_at else None, + "reviewed": item.reviewed, + "metadata": item.metadata_, + } + for item in items + ] diff --git a/backend/app/services/osint_enrichment_service.py b/backend/app/services/osint_enrichment_service.py new file mode 100644 index 0000000..47f0b2d --- /dev/null +++ b/backend/app/services/osint_enrichment_service.py @@ -0,0 +1,191 @@ +"""OSINT enrichment service — automatically discovers CVEs, advisories, and +related intelligence for MITRE ATT&CK techniques using the NVD API. + +Designed to run as a weekly background job. Respects NVD rate limits +(5 requests per 30 seconds without an API key, 50/30s with a key). +""" + +import logging +import time + +import requests +from sqlalchemy.orm import Session + +from app.config import settings +from app.models.osint_item import OsintItem +from app.models.technique import Technique + +logger = logging.getLogger(__name__) + +NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0" +NVD_RATE_LIMIT_BATCH = 5 +NVD_RATE_LIMIT_WAIT = 31 # seconds to wait after each batch + + +def enrich_technique_with_cves(db: Session, technique: Technique) -> int: + """Search for CVEs related to a technique via the NVD API. + + Uses the technique name as a keyword search. Deduplicates against + existing OsintItems so re-runs are safe. + + Returns the number of new CVEs added. + """ + try: + headers = {} + if getattr(settings, "NVD_API_KEY", ""): + headers["apiKey"] = settings.NVD_API_KEY + + params = { + "keywordSearch": technique.name, + "resultsPerPage": 10, + } + + resp = requests.get( + NVD_API_BASE, + params=params, + headers=headers, + timeout=30, + ) + if resp.status_code != 200: + logger.warning( + "NVD API error for %s: HTTP %d", + technique.mitre_id, + resp.status_code, + ) + return 0 + + data = resp.json() + count = 0 + + for vuln in data.get("vulnerabilities", []): + cve = vuln.get("cve", {}) + cve_id = cve.get("id") + if not cve_id: + continue + + # Deduplicate + exists = ( + db.query(OsintItem.id) + .filter( + OsintItem.technique_id == technique.id, + OsintItem.source_url.contains(cve_id), + ) + .first() + ) + if exists: + continue + + descriptions = cve.get("descriptions", []) + desc = next( + (d["value"] for d in descriptions if d["lang"] == "en"), "" + ) + + # Extract CVSS severity + metrics = cve.get("metrics", {}) + cvss_v31 = metrics.get("cvssMetricV31", []) + cvss_v30 = metrics.get("cvssMetricV30", []) + cvss_entry = (cvss_v31[0] if cvss_v31 else cvss_v30[0]) if (cvss_v31 or cvss_v30) else {} + cvss_data = cvss_entry.get("cvssData", {}) if cvss_entry else {} + severity = cvss_data.get("baseSeverity", "UNKNOWN") + score = cvss_data.get("baseScore") + + item = OsintItem( + technique_id=technique.id, + source_type="cve", + source_url=f"https://nvd.nist.gov/vuln/detail/{cve_id}", + title=cve_id, + description=desc[:500] if desc else None, + severity=severity, + metadata_={"cvss_score": score, "cve_id": cve_id}, + ) + db.add(item) + count += 1 + + if count > 0: + technique.review_required = True + db.commit() + logger.info("Added %d CVEs for %s", count, technique.mitre_id) + + return count + + except requests.RequestException as e: + logger.error( + "HTTP error during OSINT enrichment for %s: %s", + technique.mitre_id, + e, + ) + return 0 + except Exception as e: + logger.error( + "OSINT enrichment failed for %s: %s", + technique.mitre_id, + e, + exc_info=True, + ) + return 0 + + +def enrich_all_techniques(db: Session) -> int: + """Enrich all techniques with CVE data from NVD. + + Rate-limited: processes *NVD_RATE_LIMIT_BATCH* techniques, then + sleeps for *NVD_RATE_LIMIT_WAIT* seconds to stay under NVD limits. + + Returns total number of new OSINT items added. + """ + techniques = db.query(Technique).order_by(Technique.mitre_id).all() + total = 0 + + logger.info( + "Starting OSINT enrichment for %d techniques...", + len(techniques), + ) + + for i, tech in enumerate(techniques): + total += enrich_technique_with_cves(db, tech) + + # Rate limiting: wait after every batch + if (i + 1) % NVD_RATE_LIMIT_BATCH == 0 and (i + 1) < len(techniques): + logger.debug( + "Rate limit pause after %d techniques (%ds)...", + i + 1, + NVD_RATE_LIMIT_WAIT, + ) + time.sleep(NVD_RATE_LIMIT_WAIT) + + logger.info( + "OSINT enrichment complete — %d new items across %d techniques", + total, + len(techniques), + ) + return total + + +def get_osint_items_for_technique( + db: Session, + technique_id: str, + source_type: str | None = None, + reviewed: bool | None = None, +) -> list[OsintItem]: + """Retrieve OSINT items for a technique with optional filters.""" + query = db.query(OsintItem).filter(OsintItem.technique_id == technique_id) + if source_type: + query = query.filter(OsintItem.source_type == source_type) + if reviewed is not None: + query = query.filter(OsintItem.reviewed == reviewed) + return query.order_by(OsintItem.discovered_at.desc()).all() + + +def mark_osint_reviewed(db: Session, item_id: str) -> OsintItem | None: + """Mark an OSINT item as reviewed.""" + item = db.query(OsintItem).filter(OsintItem.id == item_id).first() + if item: + item.reviewed = True + db.commit() + db.refresh(item) + return item + + +def get_unreviewed_count(db: Session) -> int: + """Return the total number of unreviewed OSINT items.""" + return db.query(OsintItem).filter(OsintItem.reviewed == False).count() # noqa: E712 diff --git a/backend/app/services/stale_detection_service.py b/backend/app/services/stale_detection_service.py new file mode 100644 index 0000000..950edb3 --- /dev/null +++ b/backend/app/services/stale_detection_service.py @@ -0,0 +1,78 @@ +"""Stale coverage detection — marks techniques whose last validated test +is older than a configurable threshold. + +This is the simple version. The full Decay Engine (Fase 8) will replace +this with a multi-factor, configurable decay model with confidence scores. +""" + +import logging +from datetime import datetime, timedelta + +from sqlalchemy import func +from sqlalchemy.orm import Session + +from app.config import settings +from app.models.technique import Technique +from app.models.test import Test + +logger = logging.getLogger(__name__) + +STALE_THRESHOLD_DAYS = getattr(settings, "STALE_THRESHOLD_DAYS", 365) + + +def detect_stale_coverage(db: Session) -> int: + """Scan all techniques and flag those with stale coverage. + + A technique is considered stale when: + - It has a status other than ``not_evaluated``, AND + - Its most recent *validated* test is older than *STALE_THRESHOLD_DAYS*, OR + - It has never had a validated test (but has been manually marked as + covered/partial). + + Returns the number of newly-flagged techniques. + """ + cutoff = datetime.utcnow() - timedelta(days=STALE_THRESHOLD_DAYS) + + # Subquery: latest validated test date per technique + latest_test = ( + db.query( + Test.technique_id, + func.max(Test.created_at).label("last_tested"), + ) + .filter(Test.state == "validated") + .group_by(Test.technique_id) + .subquery() + ) + + # Find techniques that are stale + stale_techniques = ( + db.query(Technique) + .outerjoin(latest_test, Technique.id == latest_test.c.technique_id) + .filter( + # Either tested before cutoff, or never tested at all + (latest_test.c.last_tested < cutoff) + | (latest_test.c.last_tested.is_(None)) + ) + .filter( + # Only flag techniques that have a real status (not never-evaluated ones) + Technique.status_global != "not_evaluated" + ) + .all() + ) + + count = 0 + for tech in stale_techniques: + if not tech.review_required: + tech.review_required = True + count += 1 + logger.info("Marked %s as stale coverage", tech.mitre_id) + + if count > 0: + db.commit() + logger.info( + "Stale coverage detection complete — %d techniques flagged", count + ) + else: + logger.info("Stale coverage detection complete — no new stale techniques") + + return count