feat(phase-38): automatic intelligence — OSINT enrichment + stale coverage detection

Tarea 4.1 — OSINT Enrichment:
- Add OsintItem model with source_type, severity, CVSS metadata, review flag
- Add Alembic migration b022 with osint_items table and optimized indexes
- Add osint_enrichment_service with NVD API integration, deduplication, rate limiting
- Add OSINT router: GET /osint/items, /osint/summary, /osint/technique/{id}
- Add POST /osint/items/{id}/review to mark items as reviewed
- Add POST /osint/enrich/{technique_id} for manual single-technique enrichment
- Techniques with new CVEs are automatically flagged review_required=True
- Register weekly enrichment job in APScheduler
- Add NVD_API_KEY config setting for optional increased rate limits

Tarea 4.2 — Stale Coverage Detection:
- Add stale_detection_service that flags techniques with no validated test
  in the last N days, or never-validated but with a coverage status
- Configurable threshold via STALE_THRESHOLD_DAYS setting (default 365)
- Register daily stale detection job in APScheduler
- Only flags techniques not already marked review_required
This commit is contained in:
2026-02-17 17:47:47 +01:00
parent 31e116b4ba
commit 222979574a
9 changed files with 607 additions and 2 deletions

View File

@@ -58,6 +58,10 @@ class Settings(BaseSettings):
TEMPO_API_TOKEN: str = ""
TEMPO_DEFAULT_WORK_TYPE: str = "Red Team"
# ── OSINT / Intelligence ────────────────────────────────────────
NVD_API_KEY: str = "" # optional; increases NVD rate limit from 5/30s to 50/30s
STALE_THRESHOLD_DAYS: int = 365 # days before coverage is considered stale
# ── Reporting ─────────────────────────────────────────────────────
REPORT_TEMPLATES_DIR: str = "app/templates/reports"
REPORT_OUTPUT_DIR: str = "/tmp/aegis_reports"

View File

@@ -21,6 +21,8 @@ from app.services.notification_service import cleanup_old_notifications
from app.services.snapshot_service import create_snapshot, cleanup_old_snapshots
from app.services.campaign_scheduler_service import check_and_run_recurring_campaigns
from app.jobs.jira_sync_job import sync_all_jira_links
from app.services.osint_enrichment_service import enrich_all_techniques
from app.services.stale_detection_service import detect_stale_coverage
logger = logging.getLogger(__name__)
@@ -108,6 +110,32 @@ def _run_intel_scan() -> None:
db.close()
def _run_osint_enrichment() -> None:
"""Execute weekly OSINT enrichment inside its own DB session."""
logger.info("Scheduled OSINT enrichment job starting...")
db = SessionLocal()
try:
total = enrich_all_techniques(db)
logger.info("OSINT enrichment finished — %d new items", total)
except Exception:
logger.exception("OSINT enrichment job failed")
finally:
db.close()
def _run_stale_detection() -> None:
"""Execute daily stale coverage detection inside its own DB session."""
logger.info("Scheduled stale coverage detection starting...")
db = SessionLocal()
try:
count = detect_stale_coverage(db)
logger.info("Stale detection finished — %d techniques flagged", count)
except Exception:
logger.exception("Stale coverage detection job failed")
finally:
db.close()
# ---------------------------------------------------------------------------
# Scheduler bootstrap
# ---------------------------------------------------------------------------
@@ -173,9 +201,26 @@ def start_scheduler() -> None:
name="Jira link sync (hourly)",
replace_existing=True,
)
scheduler.add_job(
_run_osint_enrichment,
trigger="interval",
weeks=1,
id="osint_enrichment",
name="OSINT enrichment (weekly)",
replace_existing=True,
)
scheduler.add_job(
_run_stale_detection,
trigger="interval",
hours=24,
id="stale_detection",
name="Stale coverage detection (daily)",
replace_existing=True,
)
scheduler.start()
logger.info(
"Background scheduler started — mitre_sync (24h), intel_scan (7d), "
"notification_cleanup (24h), weekly_snapshot (Sundays 00:00), "
"recurring_campaigns (daily), jira_sync (1h)"
"recurring_campaigns (daily), jira_sync (1h), "
"osint_enrichment (weekly), stale_detection (daily)"
)

View File

@@ -37,6 +37,7 @@ from app.routers import worklogs as worklogs_router
from app.routers import professional_reports as professional_reports_router
from app.routers import analytics as analytics_router
from app.routers import advanced_metrics as advanced_metrics_router
from app.routers import osint as osint_router
from app.domain.exceptions import DomainException
from app.middleware.error_handler import domain_exception_handler
from app.storage import ensure_bucket_exists
@@ -120,6 +121,7 @@ app.include_router(worklogs_router.router, prefix="/api/v1")
app.include_router(professional_reports_router.router, prefix="/api/v1")
app.include_router(analytics_router.router, prefix="/api/v1")
app.include_router(advanced_metrics_router.router, prefix="/api/v1")
app.include_router(osint_router.router, prefix="/api/v1")
@app.get("/health", include_in_schema=False)

View File

@@ -18,6 +18,7 @@ from app.models.compliance import ComplianceFramework, ComplianceControl, Compli
from app.models.coverage_snapshot import CoverageSnapshot, SnapshotTechniqueState
from app.models.jira_link import JiraLink, JiraLinkEntityType, JiraSyncDirection
from app.models.worklog import Worklog
from app.models.osint_item import OsintItem
from app.models.enums import TechniqueStatus, TestState, TestResult, TeamSide
__all__ = [
@@ -30,6 +31,6 @@ __all__ = [
"ComplianceFramework", "ComplianceControl", "ComplianceControlMapping",
"CoverageSnapshot", "SnapshotTechniqueState",
"JiraLink", "JiraLinkEntityType", "JiraSyncDirection",
"Worklog",
"Worklog", "OsintItem",
"TechniqueStatus", "TestState", "TestResult", "TeamSide",
]

View File

@@ -0,0 +1,40 @@
"""OSINT enrichment items — CVEs, blogs, PoCs, and advisories linked to techniques."""
import uuid
from datetime import datetime
from sqlalchemy import Column, String, Text, Boolean, DateTime, ForeignKey
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.orm import relationship
from app.database import Base
class OsintItem(Base):
"""Represents an OSINT data point (CVE, blog, PoC, advisory) associated
with a MITRE ATT&CK technique.
Used by the enrichment pipeline to surface relevant threat intelligence
for each technique, flagging those that need review.
"""
__tablename__ = "osint_items"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
technique_id = Column(
UUID(as_uuid=True),
ForeignKey("techniques.id"),
nullable=False,
index=True,
)
source_type = Column(String(50), nullable=False) # "cve", "blog", "poc", "advisory"
source_url = Column(Text, nullable=False)
title = Column(String(500), nullable=False)
description = Column(Text, nullable=True)
severity = Column(String(20), nullable=True) # CRITICAL, HIGH, MEDIUM, LOW, UNKNOWN
discovered_at = Column(DateTime, default=datetime.utcnow, nullable=False)
reviewed = Column(Boolean, default=False)
metadata_ = Column("metadata", JSONB, default={})
# ── Relationships ─────────────────────────────────────────────────
technique = relationship("Technique", backref="osint_items")

View File

@@ -0,0 +1,197 @@
"""OSINT enrichment endpoints — view, review, and trigger enrichment of
OSINT items (CVEs, advisories, etc.) linked to techniques.
"""
from uuid import UUID
from fastapi import APIRouter, Depends, Query, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies.auth import get_current_user, require_any_role
from app.models.osint_item import OsintItem
from app.models.technique import Technique
from app.models.user import User
from app.services.osint_enrichment_service import (
enrich_technique_with_cves,
get_osint_items_for_technique,
mark_osint_reviewed,
get_unreviewed_count,
)
router = APIRouter(prefix="/osint", tags=["osint"])
# ── Schemas ──────────────────────────────────────────────────────────
class OsintItemOut(BaseModel):
id: str
technique_id: str
source_type: str
source_url: str
title: str
description: str | None
severity: str | None
discovered_at: str | None
reviewed: bool
metadata_: dict | None = None
class Config:
from_attributes = True
# ── Endpoints ────────────────────────────────────────────────────────
@router.get("/items")
def list_osint_items(
technique_id: UUID | None = Query(None),
source_type: str | None = Query(None),
reviewed: bool | None = Query(None),
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
user: User = Depends(get_current_user),
):
"""List OSINT items with optional filters."""
query = db.query(OsintItem)
if technique_id:
query = query.filter(OsintItem.technique_id == technique_id)
if source_type:
query = query.filter(OsintItem.source_type == source_type)
if reviewed is not None:
query = query.filter(OsintItem.reviewed == reviewed)
total = query.count()
items = (
query.order_by(OsintItem.discovered_at.desc())
.offset(offset)
.limit(limit)
.all()
)
return {
"total": total,
"items": [
{
"id": str(item.id),
"technique_id": str(item.technique_id),
"source_type": item.source_type,
"source_url": item.source_url,
"title": item.title,
"description": item.description,
"severity": item.severity,
"discovered_at": item.discovered_at.isoformat() if item.discovered_at else None,
"reviewed": item.reviewed,
"metadata": item.metadata_,
}
for item in items
],
}
@router.get("/summary")
def osint_summary(
db: Session = Depends(get_db),
user: User = Depends(get_current_user),
):
"""Summary statistics for OSINT items."""
from sqlalchemy import func
total = db.query(func.count(OsintItem.id)).scalar() or 0
unreviewed = get_unreviewed_count(db)
by_severity = dict(
db.query(OsintItem.severity, func.count(OsintItem.id))
.group_by(OsintItem.severity)
.all()
)
by_type = dict(
db.query(OsintItem.source_type, func.count(OsintItem.id))
.group_by(OsintItem.source_type)
.all()
)
techniques_with_items = (
db.query(func.count(func.distinct(OsintItem.technique_id))).scalar() or 0
)
return {
"total_items": total,
"unreviewed": unreviewed,
"techniques_with_items": techniques_with_items,
"by_severity": by_severity,
"by_type": by_type,
}
@router.post("/items/{item_id}/review")
def review_osint_item(
item_id: UUID,
db: Session = Depends(get_db),
user: User = Depends(get_current_user),
):
"""Mark an OSINT item as reviewed."""
item = mark_osint_reviewed(db, str(item_id))
if not item:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="OSINT item not found",
)
return {"id": str(item.id), "reviewed": True}
@router.post("/enrich/{technique_id}")
def trigger_technique_enrichment(
technique_id: UUID,
db: Session = Depends(get_db),
user: User = Depends(require_any_role("red_lead", "blue_lead")),
):
"""Manually trigger OSINT enrichment for a single technique."""
technique = db.query(Technique).filter(Technique.id == technique_id).first()
if not technique:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Technique not found",
)
count = enrich_technique_with_cves(db, technique)
return {
"technique_id": str(technique.id),
"mitre_id": technique.mitre_id,
"new_items": count,
}
@router.get("/technique/{technique_id}")
def get_technique_osint(
technique_id: UUID,
source_type: str | None = Query(None),
reviewed: bool | None = Query(None),
db: Session = Depends(get_db),
user: User = Depends(get_current_user),
):
"""Get all OSINT items for a specific technique."""
items = get_osint_items_for_technique(
db,
str(technique_id),
source_type=source_type,
reviewed=reviewed,
)
return [
{
"id": str(item.id),
"source_type": item.source_type,
"source_url": item.source_url,
"title": item.title,
"description": item.description,
"severity": item.severity,
"discovered_at": item.discovered_at.isoformat() if item.discovered_at else None,
"reviewed": item.reviewed,
"metadata": item.metadata_,
}
for item in items
]

View File

@@ -0,0 +1,191 @@
"""OSINT enrichment service — automatically discovers CVEs, advisories, and
related intelligence for MITRE ATT&CK techniques using the NVD API.
Designed to run as a weekly background job. Respects NVD rate limits
(5 requests per 30 seconds without an API key, 50/30s with a key).
"""
import logging
import time
import requests
from sqlalchemy.orm import Session
from app.config import settings
from app.models.osint_item import OsintItem
from app.models.technique import Technique
logger = logging.getLogger(__name__)
NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0"
NVD_RATE_LIMIT_BATCH = 5
NVD_RATE_LIMIT_WAIT = 31 # seconds to wait after each batch
def enrich_technique_with_cves(db: Session, technique: Technique) -> int:
"""Search for CVEs related to a technique via the NVD API.
Uses the technique name as a keyword search. Deduplicates against
existing OsintItems so re-runs are safe.
Returns the number of new CVEs added.
"""
try:
headers = {}
if getattr(settings, "NVD_API_KEY", ""):
headers["apiKey"] = settings.NVD_API_KEY
params = {
"keywordSearch": technique.name,
"resultsPerPage": 10,
}
resp = requests.get(
NVD_API_BASE,
params=params,
headers=headers,
timeout=30,
)
if resp.status_code != 200:
logger.warning(
"NVD API error for %s: HTTP %d",
technique.mitre_id,
resp.status_code,
)
return 0
data = resp.json()
count = 0
for vuln in data.get("vulnerabilities", []):
cve = vuln.get("cve", {})
cve_id = cve.get("id")
if not cve_id:
continue
# Deduplicate
exists = (
db.query(OsintItem.id)
.filter(
OsintItem.technique_id == technique.id,
OsintItem.source_url.contains(cve_id),
)
.first()
)
if exists:
continue
descriptions = cve.get("descriptions", [])
desc = next(
(d["value"] for d in descriptions if d["lang"] == "en"), ""
)
# Extract CVSS severity
metrics = cve.get("metrics", {})
cvss_v31 = metrics.get("cvssMetricV31", [])
cvss_v30 = metrics.get("cvssMetricV30", [])
cvss_entry = (cvss_v31[0] if cvss_v31 else cvss_v30[0]) if (cvss_v31 or cvss_v30) else {}
cvss_data = cvss_entry.get("cvssData", {}) if cvss_entry else {}
severity = cvss_data.get("baseSeverity", "UNKNOWN")
score = cvss_data.get("baseScore")
item = OsintItem(
technique_id=technique.id,
source_type="cve",
source_url=f"https://nvd.nist.gov/vuln/detail/{cve_id}",
title=cve_id,
description=desc[:500] if desc else None,
severity=severity,
metadata_={"cvss_score": score, "cve_id": cve_id},
)
db.add(item)
count += 1
if count > 0:
technique.review_required = True
db.commit()
logger.info("Added %d CVEs for %s", count, technique.mitre_id)
return count
except requests.RequestException as e:
logger.error(
"HTTP error during OSINT enrichment for %s: %s",
technique.mitre_id,
e,
)
return 0
except Exception as e:
logger.error(
"OSINT enrichment failed for %s: %s",
technique.mitre_id,
e,
exc_info=True,
)
return 0
def enrich_all_techniques(db: Session) -> int:
"""Enrich all techniques with CVE data from NVD.
Rate-limited: processes *NVD_RATE_LIMIT_BATCH* techniques, then
sleeps for *NVD_RATE_LIMIT_WAIT* seconds to stay under NVD limits.
Returns total number of new OSINT items added.
"""
techniques = db.query(Technique).order_by(Technique.mitre_id).all()
total = 0
logger.info(
"Starting OSINT enrichment for %d techniques...",
len(techniques),
)
for i, tech in enumerate(techniques):
total += enrich_technique_with_cves(db, tech)
# Rate limiting: wait after every batch
if (i + 1) % NVD_RATE_LIMIT_BATCH == 0 and (i + 1) < len(techniques):
logger.debug(
"Rate limit pause after %d techniques (%ds)...",
i + 1,
NVD_RATE_LIMIT_WAIT,
)
time.sleep(NVD_RATE_LIMIT_WAIT)
logger.info(
"OSINT enrichment complete — %d new items across %d techniques",
total,
len(techniques),
)
return total
def get_osint_items_for_technique(
db: Session,
technique_id: str,
source_type: str | None = None,
reviewed: bool | None = None,
) -> list[OsintItem]:
"""Retrieve OSINT items for a technique with optional filters."""
query = db.query(OsintItem).filter(OsintItem.technique_id == technique_id)
if source_type:
query = query.filter(OsintItem.source_type == source_type)
if reviewed is not None:
query = query.filter(OsintItem.reviewed == reviewed)
return query.order_by(OsintItem.discovered_at.desc()).all()
def mark_osint_reviewed(db: Session, item_id: str) -> OsintItem | None:
"""Mark an OSINT item as reviewed."""
item = db.query(OsintItem).filter(OsintItem.id == item_id).first()
if item:
item.reviewed = True
db.commit()
db.refresh(item)
return item
def get_unreviewed_count(db: Session) -> int:
"""Return the total number of unreviewed OSINT items."""
return db.query(OsintItem).filter(OsintItem.reviewed == False).count() # noqa: E712

View File

@@ -0,0 +1,78 @@
"""Stale coverage detection — marks techniques whose last validated test
is older than a configurable threshold.
This is the simple version. The full Decay Engine (Fase 8) will replace
this with a multi-factor, configurable decay model with confidence scores.
"""
import logging
from datetime import datetime, timedelta
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.config import settings
from app.models.technique import Technique
from app.models.test import Test
logger = logging.getLogger(__name__)
STALE_THRESHOLD_DAYS = getattr(settings, "STALE_THRESHOLD_DAYS", 365)
def detect_stale_coverage(db: Session) -> int:
"""Scan all techniques and flag those with stale coverage.
A technique is considered stale when:
- It has a status other than ``not_evaluated``, AND
- Its most recent *validated* test is older than *STALE_THRESHOLD_DAYS*, OR
- It has never had a validated test (but has been manually marked as
covered/partial).
Returns the number of newly-flagged techniques.
"""
cutoff = datetime.utcnow() - timedelta(days=STALE_THRESHOLD_DAYS)
# Subquery: latest validated test date per technique
latest_test = (
db.query(
Test.technique_id,
func.max(Test.created_at).label("last_tested"),
)
.filter(Test.state == "validated")
.group_by(Test.technique_id)
.subquery()
)
# Find techniques that are stale
stale_techniques = (
db.query(Technique)
.outerjoin(latest_test, Technique.id == latest_test.c.technique_id)
.filter(
# Either tested before cutoff, or never tested at all
(latest_test.c.last_tested < cutoff)
| (latest_test.c.last_tested.is_(None))
)
.filter(
# Only flag techniques that have a real status (not never-evaluated ones)
Technique.status_global != "not_evaluated"
)
.all()
)
count = 0
for tech in stale_techniques:
if not tech.review_required:
tech.review_required = True
count += 1
logger.info("Marked %s as stale coverage", tech.mitre_id)
if count > 0:
db.commit()
logger.info(
"Stale coverage detection complete — %d techniques flagged", count
)
else:
logger.info("Stale coverage detection complete — no new stale techniques")
return count