"""OSINT enrichment service — discovers CVEs, advisories, and threat intel for ATT&CK techniques via the NVD API. Designed to run as a weekly background job. Respects NVD rate limits (5 requests per 30 seconds without an API key, 50/30s with a key). """ # Import logging import logging # Import time import time # Import Optional from typing from typing import Optional # Import UUID from uuid from uuid import UUID # Import requests import requests # Import func from sqlalchemy from sqlalchemy import func # Import Session from sqlalchemy.orm from sqlalchemy.orm import Session # Import settings from app.config from app.config import settings # Import EntityNotFoundError from app.domain.errors from app.domain.errors import EntityNotFoundError # Import OsintItem from app.models.osint_item from app.models.osint_item import OsintItem # Import Technique from app.models.technique from app.models.technique import Technique # Assign logger = logging.getLogger(__name__) logger = logging.getLogger(__name__) # Assign NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0" NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0" # Assign NVD_RATE_LIMIT_BATCH = 5 NVD_RATE_LIMIT_BATCH = 5 # Assign NVD_RATE_LIMIT_WAIT = 31 # seconds to wait after each batch NVD_RATE_LIMIT_WAIT = 31 # seconds to wait after each batch # Define function enrich_technique_with_cves def enrich_technique_with_cves(db: Session, technique: Technique) -> int: """Search for CVEs related to a technique via the NVD API. Uses the technique name as a keyword search. Deduplicates against existing OsintItems so re-runs are safe. Args: db (Session): Active SQLAlchemy database session. technique (Technique): The ATT&CK technique to enrich. Returns: int: Number of new CVE items added to the database. """ # Attempt the following; catch errors below try: # Assign headers = {} headers = {} # Check: getattr(settings, "NVD_API_KEY", "") if getattr(settings, "NVD_API_KEY", ""): # Assign headers["apiKey"] = settings.NVD_API_KEY headers["apiKey"] = settings.NVD_API_KEY # Assign params = { params = { # Literal argument value "keywordSearch": technique.name, # Literal argument value "resultsPerPage": 10, } # Assign resp = requests.get( resp = requests.get( NVD_API_BASE, # Keyword argument: params params=params, # Keyword argument: headers headers=headers, # Keyword argument: timeout timeout=30, ) # Check: resp.status_code != 200 if resp.status_code != 200: # Log warning: logger.warning( # Literal argument value "NVD API error for %s: HTTP %d", technique.mitre_id, resp.status_code, ) # Return 0 return 0 # Assign data = resp.json() data = resp.json() # Assign count = 0 count = 0 # Iterate over data.get("vulnerabilities", []) for vuln in data.get("vulnerabilities", []): # Assign cve = vuln.get("cve", {}) cve = vuln.get("cve", {}) # Assign cve_id = cve.get("id") cve_id = cve.get("id") # Check: not cve_id if not cve_id: # Skip to the next loop iteration continue # Deduplicate exists = ( db.query(OsintItem.id) # Chain .filter() call .filter( OsintItem.technique_id == technique.id, OsintItem.source_url.contains(cve_id), ) # Chain .first() call .first() ) # Check: exists if exists: # Skip to the next loop iteration continue # Assign descriptions = cve.get("descriptions", []) descriptions = cve.get("descriptions", []) # Assign desc = next( desc = next( (d["value"] for d in descriptions if d["lang"] == "en"), "" ) # Extract CVSS severity metrics = cve.get("metrics", {}) # Assign cvss_v31 = metrics.get("cvssMetricV31", []) cvss_v31 = metrics.get("cvssMetricV31", []) # Assign cvss_v30 = metrics.get("cvssMetricV30", []) cvss_v30 = metrics.get("cvssMetricV30", []) # Assign cvss_entry = (cvss_v31[0] if cvss_v31 else cvss_v30[0]) if (cvss_v31 or cvss_v30... cvss_entry = (cvss_v31[0] if cvss_v31 else cvss_v30[0]) if (cvss_v31 or cvss_v30) else {} # Assign cvss_data = cvss_entry.get("cvssData", {}) if cvss_entry else {} cvss_data = cvss_entry.get("cvssData", {}) if cvss_entry else {} # Assign severity = cvss_data.get("baseSeverity", "UNKNOWN") severity = cvss_data.get("baseSeverity", "UNKNOWN") # Assign score = cvss_data.get("baseScore") score = cvss_data.get("baseScore") # Assign item = OsintItem( item = OsintItem( # Keyword argument: technique_id technique_id=technique.id, # Keyword argument: source_type source_type="cve", # Keyword argument: source_url source_url=f"https://nvd.nist.gov/vuln/detail/{cve_id}", # Keyword argument: title title=cve_id, # Keyword argument: description description=desc[:500] if desc else None, # Keyword argument: severity severity=severity, # Keyword argument: metadata_ metadata_={"cvss_score": score, "cve_id": cve_id}, ) # Stage new record(s) for database insertion db.add(item) # Assign count = 1 count += 1 # Check: count > 0 if count > 0: # Assign technique.review_required = True technique.review_required = True # Commit all pending changes to the database db.commit() # Log info: "Added %d CVEs for %s", count, technique.mitre_id logger.info("Added %d CVEs for %s", count, technique.mitre_id) # Return count return count # Handle requests.RequestException except requests.RequestException as e: # Log error: logger.error( # Literal argument value "HTTP error during OSINT enrichment for %s: %s", technique.mitre_id, e, ) # Return 0 return 0 # Handle Exception except Exception as e: # Log error: logger.error( # Literal argument value "OSINT enrichment failed for %s: %s", technique.mitre_id, e, # Keyword argument: exc_info exc_info=True, ) # Return 0 return 0 # Define function enrich_all_techniques def enrich_all_techniques(db: Session) -> int: """Enrich all techniques with CVE data from NVD. Rate-limited: processes *NVD_RATE_LIMIT_BATCH* techniques, then sleeps for *NVD_RATE_LIMIT_WAIT* seconds to stay under NVD limits. Args: db (Session): Active SQLAlchemy database session. Returns: int: Total number of new OSINT items added across all techniques. """ # Assign techniques = db.query(Technique).order_by(Technique.mitre_id).all() techniques = db.query(Technique).order_by(Technique.mitre_id).all() # Assign total = 0 total = 0 # Log info: logger.info( # Literal argument value "Starting OSINT enrichment for %d techniques...", len(techniques), ) # Iterate over enumerate(techniques) for i, tech in enumerate(techniques): # Assign total = enrich_technique_with_cves(db, tech) total += enrich_technique_with_cves(db, tech) # Rate limiting: wait after every batch if (i + 1) % NVD_RATE_LIMIT_BATCH == 0 and (i + 1) < len(techniques): # Log debug: logger.debug( # Literal argument value "Rate limit pause after %d techniques (%ds)...", i + 1, NVD_RATE_LIMIT_WAIT, ) # Call time.sleep() time.sleep(NVD_RATE_LIMIT_WAIT) # Log info: logger.info( # Literal argument value "OSINT enrichment complete — %d new items across %d techniques", total, len(techniques), ) # Return total return total # Define function get_osint_items_for_technique def get_osint_items_for_technique( # Entry: db db: Session, # Entry: technique_id technique_id: str, # Entry: source_type source_type: str | None = None, # Entry: reviewed reviewed: bool | None = None, ) -> list[OsintItem]: """Retrieve OSINT items for a technique with optional filters. Args: db (Session): Active SQLAlchemy database session. technique_id (str): UUID string of the technique to query. source_type (str | None): Optional filter by source type (e.g. ``"cve"``). reviewed (bool | None): Optional filter; ``True`` for reviewed items only, ``False`` for unreviewed, ``None`` for all. Returns: list[OsintItem]: Matching OSINT items ordered by discovery date descending. """ # Assign query = db.query(OsintItem).filter(OsintItem.technique_id == technique_id) query = db.query(OsintItem).filter(OsintItem.technique_id == technique_id) # Check: source_type if source_type: # Assign query = query.filter(OsintItem.source_type == source_type) query = query.filter(OsintItem.source_type == source_type) # Check: reviewed is not None if reviewed is not None: # Assign query = query.filter(OsintItem.reviewed == reviewed) query = query.filter(OsintItem.reviewed == reviewed) # Return query.order_by(OsintItem.discovered_at.desc()).all() return query.order_by(OsintItem.discovered_at.desc()).all() # Define function mark_osint_reviewed def mark_osint_reviewed(db: Session, item_id: str) -> OsintItem | None: """Mark an OSINT item as reviewed. Does not commit; caller uses UnitOfWork. Args: db (Session): Active SQLAlchemy database session. item_id (str): UUID string of the OSINT item to mark. Returns: OsintItem | None: The updated item, or ``None`` if not found. """ # Assign item = db.query(OsintItem).filter(OsintItem.id == item_id).first() item = db.query(OsintItem).filter(OsintItem.id == item_id).first() # Check: item if item: # Assign item.reviewed = True item.reviewed = True # Return item return item # Define function get_unreviewed_count def get_unreviewed_count(db: Session) -> int: """Return the total number of unreviewed OSINT items. Args: db (Session): Active SQLAlchemy database session. Returns: int: Count of OSINT items where ``reviewed`` is ``False``. """ # Return db.query(OsintItem).filter(OsintItem.reviewed == False).count() # ... return db.query(OsintItem).filter(OsintItem.reviewed == False).count() # noqa: E712 # Define function list_osint_items def list_osint_items( # Entry: db db: Session, *, # Entry: technique_id technique_id: Optional[UUID] = None, # Entry: source_type source_type: Optional[str] = None, # Entry: reviewed reviewed: Optional[bool] = None, # Entry: offset offset: int = 0, # Entry: limit limit: int = 50, ) -> dict: """List OSINT items with optional filters and pagination. Args: db (Session): Active SQLAlchemy database session. technique_id (Optional[UUID]): Filter by technique UUID. source_type (Optional[str]): Filter by source type string (e.g. ``"cve"``). reviewed (Optional[bool]): Filter by reviewed status; ``None`` returns all. offset (int): Number of records to skip for pagination. limit (int): Maximum number of records to return. Returns: dict: Contains ``total`` count and ``items`` list of serialized OSINT item dicts. """ # Assign query = db.query(OsintItem) query = db.query(OsintItem) # Check: technique_id if technique_id: # Assign query = query.filter(OsintItem.technique_id == technique_id) query = query.filter(OsintItem.technique_id == technique_id) # Check: source_type if source_type: # Assign query = query.filter(OsintItem.source_type == source_type) query = query.filter(OsintItem.source_type == source_type) # Check: reviewed is not None if reviewed is not None: # Assign query = query.filter(OsintItem.reviewed == reviewed) query = query.filter(OsintItem.reviewed == reviewed) # Assign total = query.count() total = query.count() # Assign items = ( items = ( query.order_by(OsintItem.discovered_at.desc()) # Chain .offset() call .offset(offset) # Chain .limit() call .limit(limit) # Chain .all() call .all() ) # Return { return { # Literal argument value "total": total, # Literal argument value "items": [ { # Literal argument value "id": str(item.id), # Literal argument value "technique_id": str(item.technique_id), # Literal argument value "source_type": item.source_type, # Literal argument value "source_url": item.source_url, # Literal argument value "title": item.title, # Literal argument value "description": item.description, # Literal argument value "severity": item.severity, # Literal argument value "discovered_at": item.discovered_at.isoformat() if item.discovered_at else None, # Literal argument value "reviewed": item.reviewed, # Literal argument value "metadata": item.metadata_, } for item in items ], } # Define function get_osint_summary def get_osint_summary(db: Session) -> dict: """Return summary statistics for OSINT items. Args: db (Session): Active SQLAlchemy database session. Returns: dict: Contains ``total_items``, ``unreviewed``, ``techniques_with_items``, ``by_severity``, and ``by_type``. """ # Assign total = db.query(func.count(OsintItem.id)).scalar() or 0 total = db.query(func.count(OsintItem.id)).scalar() or 0 # Assign unreviewed = get_unreviewed_count(db) unreviewed = get_unreviewed_count(db) # Assign by_severity = dict( by_severity = dict( db.query(OsintItem.severity, func.count(OsintItem.id)) # Chain .group_by() call .group_by(OsintItem.severity) # Chain .all() call .all() ) # Assign by_type = dict( by_type = dict( db.query(OsintItem.source_type, func.count(OsintItem.id)) # Chain .group_by() call .group_by(OsintItem.source_type) # Chain .all() call .all() ) # Assign techniques_with_items = ( techniques_with_items = ( db.query(func.count(func.distinct(OsintItem.technique_id))).scalar() or 0 ) # Return { return { # Literal argument value "total_items": total, # Literal argument value "unreviewed": unreviewed, # Literal argument value "techniques_with_items": techniques_with_items, # Literal argument value "by_severity": by_severity, # Literal argument value "by_type": by_type, } # Define function get_technique_or_raise def get_technique_or_raise(db: Session, technique_id: UUID) -> Technique: """Return a technique by ID or raise EntityNotFoundError. Args: db (Session): Active SQLAlchemy database session. technique_id (UUID): UUID of the technique to retrieve. Returns: Technique: The matching technique ORM object. """ # Assign technique = db.query(Technique).filter(Technique.id == technique_id).first() technique = db.query(Technique).filter(Technique.id == technique_id).first() # Check: not technique if not technique: # Raise EntityNotFoundError raise EntityNotFoundError("Technique", str(technique_id)) # Return technique return technique