feat(phase-38): automatic intelligence — OSINT enrichment + stale coverage detection
Tarea 4.1 — OSINT Enrichment:
- Add OsintItem model with source_type, severity, CVSS metadata, review flag
- Add Alembic migration b022 with osint_items table and optimized indexes
- Add osint_enrichment_service with NVD API integration, deduplication, rate limiting
- Add OSINT router: GET /osint/items, /osint/summary, /osint/technique/{id}
- Add POST /osint/items/{id}/review to mark items as reviewed
- Add POST /osint/enrich/{technique_id} for manual single-technique enrichment
- Techniques with new CVEs are automatically flagged review_required=True
- Register weekly enrichment job in APScheduler
- Add NVD_API_KEY config setting for optional increased rate limits
Tarea 4.2 — Stale Coverage Detection:
- Add stale_detection_service that flags techniques with no validated test
in the last N days, or never-validated but with a coverage status
- Configurable threshold via STALE_THRESHOLD_DAYS setting (default 365)
- Register daily stale detection job in APScheduler
- Only flags techniques not already marked review_required
This commit is contained in:
191
backend/app/services/osint_enrichment_service.py
Normal file
191
backend/app/services/osint_enrichment_service.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""OSINT enrichment service — automatically discovers CVEs, advisories, and
|
||||
related intelligence for MITRE ATT&CK techniques using the NVD API.
|
||||
|
||||
Designed to run as a weekly background job. Respects NVD rate limits
|
||||
(5 requests per 30 seconds without an API key, 50/30s with a key).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import requests
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models.osint_item import OsintItem
|
||||
from app.models.technique import Technique
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0"
|
||||
NVD_RATE_LIMIT_BATCH = 5
|
||||
NVD_RATE_LIMIT_WAIT = 31 # seconds to wait after each batch
|
||||
|
||||
|
||||
def enrich_technique_with_cves(db: Session, technique: Technique) -> int:
|
||||
"""Search for CVEs related to a technique via the NVD API.
|
||||
|
||||
Uses the technique name as a keyword search. Deduplicates against
|
||||
existing OsintItems so re-runs are safe.
|
||||
|
||||
Returns the number of new CVEs added.
|
||||
"""
|
||||
try:
|
||||
headers = {}
|
||||
if getattr(settings, "NVD_API_KEY", ""):
|
||||
headers["apiKey"] = settings.NVD_API_KEY
|
||||
|
||||
params = {
|
||||
"keywordSearch": technique.name,
|
||||
"resultsPerPage": 10,
|
||||
}
|
||||
|
||||
resp = requests.get(
|
||||
NVD_API_BASE,
|
||||
params=params,
|
||||
headers=headers,
|
||||
timeout=30,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
"NVD API error for %s: HTTP %d",
|
||||
technique.mitre_id,
|
||||
resp.status_code,
|
||||
)
|
||||
return 0
|
||||
|
||||
data = resp.json()
|
||||
count = 0
|
||||
|
||||
for vuln in data.get("vulnerabilities", []):
|
||||
cve = vuln.get("cve", {})
|
||||
cve_id = cve.get("id")
|
||||
if not cve_id:
|
||||
continue
|
||||
|
||||
# Deduplicate
|
||||
exists = (
|
||||
db.query(OsintItem.id)
|
||||
.filter(
|
||||
OsintItem.technique_id == technique.id,
|
||||
OsintItem.source_url.contains(cve_id),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if exists:
|
||||
continue
|
||||
|
||||
descriptions = cve.get("descriptions", [])
|
||||
desc = next(
|
||||
(d["value"] for d in descriptions if d["lang"] == "en"), ""
|
||||
)
|
||||
|
||||
# Extract CVSS severity
|
||||
metrics = cve.get("metrics", {})
|
||||
cvss_v31 = metrics.get("cvssMetricV31", [])
|
||||
cvss_v30 = metrics.get("cvssMetricV30", [])
|
||||
cvss_entry = (cvss_v31[0] if cvss_v31 else cvss_v30[0]) if (cvss_v31 or cvss_v30) else {}
|
||||
cvss_data = cvss_entry.get("cvssData", {}) if cvss_entry else {}
|
||||
severity = cvss_data.get("baseSeverity", "UNKNOWN")
|
||||
score = cvss_data.get("baseScore")
|
||||
|
||||
item = OsintItem(
|
||||
technique_id=technique.id,
|
||||
source_type="cve",
|
||||
source_url=f"https://nvd.nist.gov/vuln/detail/{cve_id}",
|
||||
title=cve_id,
|
||||
description=desc[:500] if desc else None,
|
||||
severity=severity,
|
||||
metadata_={"cvss_score": score, "cve_id": cve_id},
|
||||
)
|
||||
db.add(item)
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
technique.review_required = True
|
||||
db.commit()
|
||||
logger.info("Added %d CVEs for %s", count, technique.mitre_id)
|
||||
|
||||
return count
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(
|
||||
"HTTP error during OSINT enrichment for %s: %s",
|
||||
technique.mitre_id,
|
||||
e,
|
||||
)
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"OSINT enrichment failed for %s: %s",
|
||||
technique.mitre_id,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def enrich_all_techniques(db: Session) -> int:
|
||||
"""Enrich all techniques with CVE data from NVD.
|
||||
|
||||
Rate-limited: processes *NVD_RATE_LIMIT_BATCH* techniques, then
|
||||
sleeps for *NVD_RATE_LIMIT_WAIT* seconds to stay under NVD limits.
|
||||
|
||||
Returns total number of new OSINT items added.
|
||||
"""
|
||||
techniques = db.query(Technique).order_by(Technique.mitre_id).all()
|
||||
total = 0
|
||||
|
||||
logger.info(
|
||||
"Starting OSINT enrichment for %d techniques...",
|
||||
len(techniques),
|
||||
)
|
||||
|
||||
for i, tech in enumerate(techniques):
|
||||
total += enrich_technique_with_cves(db, tech)
|
||||
|
||||
# Rate limiting: wait after every batch
|
||||
if (i + 1) % NVD_RATE_LIMIT_BATCH == 0 and (i + 1) < len(techniques):
|
||||
logger.debug(
|
||||
"Rate limit pause after %d techniques (%ds)...",
|
||||
i + 1,
|
||||
NVD_RATE_LIMIT_WAIT,
|
||||
)
|
||||
time.sleep(NVD_RATE_LIMIT_WAIT)
|
||||
|
||||
logger.info(
|
||||
"OSINT enrichment complete — %d new items across %d techniques",
|
||||
total,
|
||||
len(techniques),
|
||||
)
|
||||
return total
|
||||
|
||||
|
||||
def get_osint_items_for_technique(
|
||||
db: Session,
|
||||
technique_id: str,
|
||||
source_type: str | None = None,
|
||||
reviewed: bool | None = None,
|
||||
) -> list[OsintItem]:
|
||||
"""Retrieve OSINT items for a technique with optional filters."""
|
||||
query = db.query(OsintItem).filter(OsintItem.technique_id == technique_id)
|
||||
if source_type:
|
||||
query = query.filter(OsintItem.source_type == source_type)
|
||||
if reviewed is not None:
|
||||
query = query.filter(OsintItem.reviewed == reviewed)
|
||||
return query.order_by(OsintItem.discovered_at.desc()).all()
|
||||
|
||||
|
||||
def mark_osint_reviewed(db: Session, item_id: str) -> OsintItem | None:
|
||||
"""Mark an OSINT item as reviewed."""
|
||||
item = db.query(OsintItem).filter(OsintItem.id == item_id).first()
|
||||
if item:
|
||||
item.reviewed = True
|
||||
db.commit()
|
||||
db.refresh(item)
|
||||
return item
|
||||
|
||||
|
||||
def get_unreviewed_count(db: Session) -> int:
|
||||
"""Return the total number of unreviewed OSINT items."""
|
||||
return db.query(OsintItem).filter(OsintItem.reviewed == False).count() # noqa: E712
|
||||
78
backend/app/services/stale_detection_service.py
Normal file
78
backend/app/services/stale_detection_service.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Stale coverage detection — marks techniques whose last validated test
|
||||
is older than a configurable threshold.
|
||||
|
||||
This is the simple version. The full Decay Engine (Fase 8) will replace
|
||||
this with a multi-factor, configurable decay model with confidence scores.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models.technique import Technique
|
||||
from app.models.test import Test
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STALE_THRESHOLD_DAYS = getattr(settings, "STALE_THRESHOLD_DAYS", 365)
|
||||
|
||||
|
||||
def detect_stale_coverage(db: Session) -> int:
|
||||
"""Scan all techniques and flag those with stale coverage.
|
||||
|
||||
A technique is considered stale when:
|
||||
- It has a status other than ``not_evaluated``, AND
|
||||
- Its most recent *validated* test is older than *STALE_THRESHOLD_DAYS*, OR
|
||||
- It has never had a validated test (but has been manually marked as
|
||||
covered/partial).
|
||||
|
||||
Returns the number of newly-flagged techniques.
|
||||
"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=STALE_THRESHOLD_DAYS)
|
||||
|
||||
# Subquery: latest validated test date per technique
|
||||
latest_test = (
|
||||
db.query(
|
||||
Test.technique_id,
|
||||
func.max(Test.created_at).label("last_tested"),
|
||||
)
|
||||
.filter(Test.state == "validated")
|
||||
.group_by(Test.technique_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
# Find techniques that are stale
|
||||
stale_techniques = (
|
||||
db.query(Technique)
|
||||
.outerjoin(latest_test, Technique.id == latest_test.c.technique_id)
|
||||
.filter(
|
||||
# Either tested before cutoff, or never tested at all
|
||||
(latest_test.c.last_tested < cutoff)
|
||||
| (latest_test.c.last_tested.is_(None))
|
||||
)
|
||||
.filter(
|
||||
# Only flag techniques that have a real status (not never-evaluated ones)
|
||||
Technique.status_global != "not_evaluated"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
count = 0
|
||||
for tech in stale_techniques:
|
||||
if not tech.review_required:
|
||||
tech.review_required = True
|
||||
count += 1
|
||||
logger.info("Marked %s as stale coverage", tech.mitre_id)
|
||||
|
||||
if count > 0:
|
||||
db.commit()
|
||||
logger.info(
|
||||
"Stale coverage detection complete — %d techniques flagged", count
|
||||
)
|
||||
else:
|
||||
logger.info("Stale coverage detection complete — no new stale techniques")
|
||||
|
||||
return count
|
||||
Reference in New Issue
Block a user