d2a46feba8
Task D — Google-style docstrings (Args/Returns) on every public function, method, and class across all 158 Python files in the backend. Zero ruff D violations (pydocstyle Google convention). Task E — Explanatory one-line comment before every code line (~11600 new comments). ruff check passes clean after isort re-sort.
590 lines
21 KiB
Python
590 lines
21 KiB
Python
"""Threat Actor import service (MITRE CTI / STIX 2.0).
|
|
|
|
Downloads the MITRE CTI repository, parses the STIX 2.0 bundle for
|
|
``intrusion-set`` objects (APT groups) and ``relationship`` objects
|
|
linking them to ``attack-pattern`` (techniques), then creates
|
|
:class:`ThreatActor` and :class:`ThreatActorTechnique` records.
|
|
|
|
STIX 2.0 structure
|
|
------------------
|
|
The enterprise-attack bundle contains:
|
|
- ``intrusion-set`` objects → our ThreatActor rows
|
|
- ``attack-pattern`` objects → already in our Technique table
|
|
- ``relationship`` objects (type=uses) → connects intrusion-set → attack-pattern
|
|
|
|
Strategy
|
|
--------
|
|
1. Download ZIP of ``github.com/mitre/cti``.
|
|
2. Load ``enterprise-attack/enterprise-attack.json`` (single STIX bundle).
|
|
3. Build lookup maps for intrusion-sets and attack-patterns.
|
|
4. Parse relationships to connect actors → techniques.
|
|
5. Upsert into database.
|
|
|
|
Idempotency
|
|
-----------
|
|
Deduplication by ``mitre_id`` for ThreatActor and by the unique
|
|
constraint ``(threat_actor_id, technique_id)`` for ThreatActorTechnique.
|
|
"""
|
|
|
|
# Import io
|
|
import io
|
|
|
|
# Import json
|
|
import json
|
|
|
|
# Import logging
|
|
import logging
|
|
|
|
# Import shutil
|
|
import shutil
|
|
|
|
# Import tempfile
|
|
import tempfile
|
|
|
|
# Import zipfile
|
|
import zipfile
|
|
|
|
# Import datetime from datetime
|
|
from datetime import datetime
|
|
|
|
# Import Path from pathlib
|
|
from pathlib import Path
|
|
|
|
# Import requests
|
|
import requests as _requests
|
|
|
|
# Import Session from sqlalchemy.orm
|
|
from sqlalchemy.orm import Session
|
|
|
|
# Import DataSource from app.models.data_source
|
|
from app.models.data_source import DataSource
|
|
|
|
# Import Technique from app.models.technique
|
|
from app.models.technique import Technique
|
|
|
|
# Import ThreatActor, ThreatActorTechnique from app.models.threat_actor
|
|
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
|
|
|
|
# Import log_action from app.services.audit_service
|
|
from app.services.audit_service import log_action
|
|
|
|
# Assign logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constants
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MITRE_CTI_ZIP_URL = (
|
|
# Literal argument value
|
|
"https://github.com/mitre/cti"
|
|
# Literal argument value
|
|
"/archive/refs/heads/master.zip"
|
|
)
|
|
|
|
# Assign _DOWNLOAD_TIMEOUT = 300
|
|
_DOWNLOAD_TIMEOUT = 300
|
|
# Assign _ZIP_ROOT_PREFIX = "cti-master"
|
|
_ZIP_ROOT_PREFIX = "cti-master"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _download_zip(url: str = MITRE_CTI_ZIP_URL) -> bytes:
|
|
"""Download the MITRE CTI ZIP and return raw bytes."""
|
|
# Log info: "Downloading MITRE CTI ZIP from %s …", url
|
|
logger.info("Downloading MITRE CTI ZIP from %s …", url)
|
|
# Assign resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
|
|
resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
|
|
# Call resp.raise_for_status()
|
|
resp.raise_for_status()
|
|
# Assign content = resp.content
|
|
content = resp.content
|
|
# Log info: "Downloaded %.1f MB", len(content) / (1024 * 1024
|
|
logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
|
|
# Return content
|
|
return content
|
|
|
|
|
|
# Define function _extract_zip_and_load_bundle
|
|
def _extract_zip_and_load_bundle(zip_bytes: bytes, dest: str) -> dict:
|
|
"""Extract ZIP and load the enterprise-attack STIX bundle."""
|
|
# Open context manager
|
|
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
|
|
# Call zf.extractall()
|
|
zf.extractall(dest)
|
|
|
|
# Assign bundle_path = (
|
|
bundle_path = (
|
|
Path(dest) / _ZIP_ROOT_PREFIX
|
|
/ "enterprise-attack" / "enterprise-attack.json"
|
|
)
|
|
# Check: not bundle_path.is_file()
|
|
if not bundle_path.is_file():
|
|
# Raise FileNotFoundError
|
|
raise FileNotFoundError(
|
|
f"STIX bundle not found at {bundle_path}"
|
|
)
|
|
|
|
# Log info: "Loading STIX bundle from %s …", bundle_path
|
|
logger.info("Loading STIX bundle from %s …", bundle_path)
|
|
# Open context manager
|
|
with open(bundle_path, "r", encoding="utf-8") as fh:
|
|
# Assign bundle = json.load(fh)
|
|
bundle = json.load(fh)
|
|
|
|
# Assign objects = bundle.get("objects", [])
|
|
objects = bundle.get("objects", [])
|
|
# Log info: "Loaded %d STIX objects", len(objects
|
|
logger.info("Loaded %d STIX objects", len(objects))
|
|
# Return bundle
|
|
return bundle
|
|
|
|
|
|
# Define function _extract_mitre_id
|
|
def _extract_mitre_id(external_references: list) -> str | None:
|
|
"""Extract the MITRE ATT&CK ID from external_references."""
|
|
# Check: not isinstance(external_references, list)
|
|
if not isinstance(external_references, list):
|
|
# Return None
|
|
return None
|
|
# Iterate over external_references
|
|
for ref in external_references:
|
|
# Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
|
|
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
|
|
# Return ref.get("external_id")
|
|
return ref.get("external_id")
|
|
# Return None
|
|
return None
|
|
|
|
|
|
# Define function _extract_mitre_url
|
|
def _extract_mitre_url(external_references: list) -> str | None:
|
|
"""Extract the MITRE ATT&CK URL from external_references."""
|
|
# Check: not isinstance(external_references, list)
|
|
if not isinstance(external_references, list):
|
|
# Return None
|
|
return None
|
|
# Iterate over external_references
|
|
for ref in external_references:
|
|
# Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
|
|
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
|
|
# Return ref.get("url")
|
|
return ref.get("url")
|
|
# Return None
|
|
return None
|
|
|
|
|
|
# Define function _parse_intrusion_sets
|
|
def _parse_intrusion_sets(objects: list) -> list[dict]:
|
|
"""Parse STIX intrusion-set objects into ThreatActor dicts."""
|
|
# Assign actors = []
|
|
actors = []
|
|
# Iterate over objects
|
|
for obj in objects:
|
|
# Check: obj.get("type") != "intrusion-set"
|
|
if obj.get("type") != "intrusion-set":
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Check: obj.get("revoked")
|
|
if obj.get("revoked"):
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign ext_refs = obj.get("external_references", [])
|
|
ext_refs = obj.get("external_references", [])
|
|
# Assign mitre_id = _extract_mitre_id(ext_refs)
|
|
mitre_id = _extract_mitre_id(ext_refs)
|
|
# Assign mitre_url = _extract_mitre_url(ext_refs)
|
|
mitre_url = _extract_mitre_url(ext_refs)
|
|
|
|
# Assign name = obj.get("name", "").strip()
|
|
name = obj.get("name", "").strip()
|
|
# Check: not name
|
|
if not name:
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign aliases = obj.get("aliases", [])
|
|
aliases = obj.get("aliases", [])
|
|
# Check: isinstance(aliases, list) and name in aliases
|
|
if isinstance(aliases, list) and name in aliases:
|
|
# Assign aliases = [a for a in aliases if a != name]
|
|
aliases = [a for a in aliases if a != name]
|
|
|
|
# Assign description = obj.get("description", "")
|
|
description = obj.get("description", "")
|
|
|
|
# Extract references (non-MITRE)
|
|
references = []
|
|
# Iterate over ext_refs
|
|
for ref in ext_refs:
|
|
# Check: isinstance(ref, dict) and ref.get("source_name") != "mitre-attack"
|
|
if isinstance(ref, dict) and ref.get("source_name") != "mitre-attack":
|
|
# Call references.append()
|
|
references.append({
|
|
# Literal argument value
|
|
"source": ref.get("source_name", ""),
|
|
# Literal argument value
|
|
"url": ref.get("url", ""),
|
|
# Literal argument value
|
|
"description": ref.get("description", ""),
|
|
})
|
|
|
|
# Call actors.append()
|
|
actors.append({
|
|
# Literal argument value
|
|
"stix_id": obj.get("id"), # e.g. "intrusion-set--abc123"
|
|
# Literal argument value
|
|
"mitre_id": mitre_id,
|
|
# Literal argument value
|
|
"name": name,
|
|
# Literal argument value
|
|
"aliases": aliases if aliases else [],
|
|
# Literal argument value
|
|
"description": description,
|
|
# Literal argument value
|
|
"mitre_url": mitre_url,
|
|
# Literal argument value
|
|
"references": references[:20], # cap to avoid bloat
|
|
# Literal argument value
|
|
"first_seen": obj.get("first_seen"),
|
|
# Literal argument value
|
|
"last_seen": obj.get("last_seen"),
|
|
})
|
|
|
|
# Log info: "Parsed %d intrusion-sets (threat actors)", len(ac
|
|
logger.info("Parsed %d intrusion-sets (threat actors)", len(actors))
|
|
# Return actors
|
|
return actors
|
|
|
|
|
|
# Define function _parse_relationships
|
|
def _parse_relationships(objects: list) -> list[dict]:
|
|
"""Parse STIX relationship objects (type=uses) linking intrusion-sets to attack-patterns."""
|
|
# Assign relationships = []
|
|
relationships = []
|
|
# Iterate over objects
|
|
for obj in objects:
|
|
# Check: obj.get("type") != "relationship"
|
|
if obj.get("type") != "relationship":
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Check: obj.get("relationship_type") != "uses"
|
|
if obj.get("relationship_type") != "uses":
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Check: obj.get("revoked")
|
|
if obj.get("revoked"):
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign source_ref = obj.get("source_ref", "")
|
|
source_ref = obj.get("source_ref", "")
|
|
# Assign target_ref = obj.get("target_ref", "")
|
|
target_ref = obj.get("target_ref", "")
|
|
|
|
# We want intrusion-set → attack-pattern
|
|
if not source_ref.startswith("intrusion-set--"):
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Check: not target_ref.startswith("attack-pattern--")
|
|
if not target_ref.startswith("attack-pattern--"):
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Call relationships.append()
|
|
relationships.append({
|
|
# Literal argument value
|
|
"source_ref": source_ref,
|
|
# Literal argument value
|
|
"target_ref": target_ref,
|
|
# Literal argument value
|
|
"description": obj.get("description", ""),
|
|
})
|
|
|
|
# Log info: "Parsed %d uses-relationships (actor→technique)",
|
|
logger.info("Parsed %d uses-relationships (actor→technique)", len(relationships))
|
|
# Return relationships
|
|
return relationships
|
|
|
|
|
|
# Define function _build_attack_pattern_map
|
|
def _build_attack_pattern_map(objects: list) -> dict[str, str]:
|
|
"""Build a map from STIX attack-pattern ID → MITRE technique ID.
|
|
|
|
e.g. {"attack-pattern--abc123": "T1059.001"}
|
|
"""
|
|
# Assign mapping = {}
|
|
mapping = {}
|
|
# Iterate over objects
|
|
for obj in objects:
|
|
# Check: obj.get("type") != "attack-pattern"
|
|
if obj.get("type") != "attack-pattern":
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Check: obj.get("revoked")
|
|
if obj.get("revoked"):
|
|
# Skip to the next loop iteration
|
|
continue
|
|
# Assign stix_id = obj.get("id", "")
|
|
stix_id = obj.get("id", "")
|
|
# Assign mitre_id = _extract_mitre_id(obj.get("external_references", []))
|
|
mitre_id = _extract_mitre_id(obj.get("external_references", []))
|
|
# Check: stix_id and mitre_id
|
|
if stix_id and mitre_id:
|
|
# Assign mapping[stix_id] = mitre_id
|
|
mapping[stix_id] = mitre_id
|
|
# Log info: "Built attack-pattern map with %d entries", len(ma
|
|
logger.info("Built attack-pattern map with %d entries", len(mapping))
|
|
# Return mapping
|
|
return mapping
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def sync(db: Session) -> dict:
|
|
"""Download and import threat actors from MITRE CTI.
|
|
|
|
Returns a summary dict.
|
|
"""
|
|
# Assign tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
|
|
tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
|
|
# Attempt the following; catch errors below
|
|
try:
|
|
# Assign zip_bytes = _download_zip()
|
|
zip_bytes = _download_zip()
|
|
# Assign bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
|
|
bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
|
|
# Always execute this cleanup block
|
|
finally:
|
|
# Call shutil.rmtree()
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
# Log info: "Cleaned up temp directory %s", tmp_dir
|
|
logger.info("Cleaned up temp directory %s", tmp_dir)
|
|
|
|
# Assign objects = bundle.get("objects", [])
|
|
objects = bundle.get("objects", [])
|
|
|
|
# Step 1: Parse data
|
|
actor_dicts = _parse_intrusion_sets(objects)
|
|
# Assign relationships = _parse_relationships(objects)
|
|
relationships = _parse_relationships(objects)
|
|
# Assign attack_pattern_map = _build_attack_pattern_map(objects)
|
|
attack_pattern_map = _build_attack_pattern_map(objects)
|
|
|
|
# Step 3: Load existing actors and techniques from DB
|
|
existing_actors = {
|
|
row.mitre_id: row
|
|
for row in db.query(ThreatActor).all()
|
|
if row.mitre_id
|
|
}
|
|
|
|
# Assign technique_by_mitre_id = {
|
|
technique_by_mitre_id = {
|
|
row.mitre_id: row
|
|
for row in db.query(Technique).all()
|
|
}
|
|
|
|
# Step 4: Upsert threat actors
|
|
actors_created = 0
|
|
# Assign actors_skipped = 0
|
|
actors_skipped = 0
|
|
# Assign stix_to_db_actor = {}
|
|
stix_to_db_actor: dict[str, ThreatActor] = {}
|
|
|
|
# Iterate over actor_dicts
|
|
for actor_dict in actor_dicts:
|
|
# Assign mitre_id = actor_dict["mitre_id"]
|
|
mitre_id = actor_dict["mitre_id"]
|
|
# Assign stix_id = actor_dict["stix_id"]
|
|
stix_id = actor_dict["stix_id"]
|
|
|
|
# Check: mitre_id and mitre_id in existing_actors
|
|
if mitre_id and mitre_id in existing_actors:
|
|
# Update existing actor
|
|
db_actor = existing_actors[mitre_id]
|
|
# Assign db_actor.name = actor_dict["name"]
|
|
db_actor.name = actor_dict["name"]
|
|
# Assign db_actor.aliases = actor_dict["aliases"]
|
|
db_actor.aliases = actor_dict["aliases"]
|
|
# Assign db_actor.description = actor_dict["description"]
|
|
db_actor.description = actor_dict["description"]
|
|
# Assign db_actor.mitre_url = actor_dict["mitre_url"]
|
|
db_actor.mitre_url = actor_dict["mitre_url"]
|
|
# Assign db_actor.references = actor_dict["references"]
|
|
db_actor.references = actor_dict["references"]
|
|
# Assign db_actor.first_seen = actor_dict.get("first_seen")
|
|
db_actor.first_seen = actor_dict.get("first_seen")
|
|
# Assign db_actor.last_seen = actor_dict.get("last_seen")
|
|
db_actor.last_seen = actor_dict.get("last_seen")
|
|
# Assign stix_to_db_actor[stix_id] = db_actor
|
|
stix_to_db_actor[stix_id] = db_actor
|
|
# Assign actors_skipped = 1
|
|
actors_skipped += 1
|
|
# Fallback: handle remaining cases
|
|
else:
|
|
# Create new actor
|
|
db_actor = ThreatActor(
|
|
# Keyword argument: mitre_id
|
|
mitre_id=mitre_id,
|
|
# Keyword argument: name
|
|
name=actor_dict["name"],
|
|
# Keyword argument: aliases
|
|
aliases=actor_dict["aliases"],
|
|
# Keyword argument: description
|
|
description=actor_dict["description"],
|
|
# Keyword argument: mitre_url
|
|
mitre_url=actor_dict["mitre_url"],
|
|
# Keyword argument: references
|
|
references=actor_dict["references"],
|
|
# Keyword argument: first_seen
|
|
first_seen=actor_dict.get("first_seen"),
|
|
# Keyword argument: last_seen
|
|
last_seen=actor_dict.get("last_seen"),
|
|
# Keyword argument: is_active
|
|
is_active=True,
|
|
)
|
|
# Stage new record(s) for database insertion
|
|
db.add(db_actor)
|
|
# Flush changes to DB without committing the transaction
|
|
db.flush() # get the ID
|
|
# Check: mitre_id
|
|
if mitre_id:
|
|
# Assign existing_actors[mitre_id] = db_actor
|
|
existing_actors[mitre_id] = db_actor
|
|
# Assign stix_to_db_actor[stix_id] = db_actor
|
|
stix_to_db_actor[stix_id] = db_actor
|
|
# Assign actors_created = 1
|
|
actors_created += 1
|
|
|
|
# Flush changes to DB without committing the transaction
|
|
db.flush()
|
|
|
|
# Step 5: Upsert actor-technique relationships
|
|
# Load existing relationships
|
|
existing_rels: set[tuple] = set()
|
|
# Iterate over db.query(ThreatActorTechnique).all()
|
|
for row in db.query(ThreatActorTechnique).all():
|
|
# Call existing_rels.add()
|
|
existing_rels.add((str(row.threat_actor_id), str(row.technique_id)))
|
|
|
|
# Assign rels_created = 0
|
|
rels_created = 0
|
|
# Assign rels_skipped = 0
|
|
rels_skipped = 0
|
|
|
|
# Iterate over relationships
|
|
for rel in relationships:
|
|
# Assign source_ref = rel["source_ref"]
|
|
source_ref = rel["source_ref"]
|
|
# Assign target_ref = rel["target_ref"]
|
|
target_ref = rel["target_ref"]
|
|
|
|
# Resolve actor
|
|
db_actor = stix_to_db_actor.get(source_ref)
|
|
# Check: not db_actor
|
|
if not db_actor:
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Resolve technique
|
|
mitre_technique_id = attack_pattern_map.get(target_ref)
|
|
# Check: not mitre_technique_id
|
|
if not mitre_technique_id:
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign db_technique = technique_by_mitre_id.get(mitre_technique_id)
|
|
db_technique = technique_by_mitre_id.get(mitre_technique_id)
|
|
# Check: not db_technique
|
|
if not db_technique:
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign rel_key = (str(db_actor.id), str(db_technique.id))
|
|
rel_key = (str(db_actor.id), str(db_technique.id))
|
|
# Check: rel_key in existing_rels
|
|
if rel_key in existing_rels:
|
|
# Assign rels_skipped = 1
|
|
rels_skipped += 1
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign actor_technique = ThreatActorTechnique(
|
|
actor_technique = ThreatActorTechnique(
|
|
# Keyword argument: threat_actor_id
|
|
threat_actor_id=db_actor.id,
|
|
# Keyword argument: technique_id
|
|
technique_id=db_technique.id,
|
|
# Keyword argument: usage_description
|
|
usage_description=rel["description"][:5000] if rel["description"] else None,
|
|
)
|
|
# Stage new record(s) for database insertion
|
|
db.add(actor_technique)
|
|
# Call existing_rels.add()
|
|
existing_rels.add(rel_key)
|
|
# Assign rels_created = 1
|
|
rels_created += 1
|
|
|
|
# Commit all pending changes to the database
|
|
db.commit()
|
|
|
|
# Assign summary = {
|
|
summary = {
|
|
# Literal argument value
|
|
"actors_created": actors_created,
|
|
# Literal argument value
|
|
"actors_updated": actors_skipped,
|
|
# Literal argument value
|
|
"relationships_created": rels_created,
|
|
# Literal argument value
|
|
"relationships_skipped": rels_skipped,
|
|
# Literal argument value
|
|
"total_actors_parsed": len(actor_dicts),
|
|
# Literal argument value
|
|
"total_relationships_parsed": len(relationships),
|
|
}
|
|
|
|
# Update DataSource record
|
|
ds = db.query(DataSource).filter(DataSource.name == "mitre_cti").first()
|
|
# Check: ds
|
|
if ds:
|
|
# Assign ds.last_sync_at = datetime.utcnow()
|
|
ds.last_sync_at = datetime.utcnow()
|
|
# Assign ds.last_sync_status = "success"
|
|
ds.last_sync_status = "success"
|
|
# Assign ds.last_sync_stats = summary
|
|
ds.last_sync_stats = summary
|
|
# Commit all pending changes to the database
|
|
db.commit()
|
|
|
|
# Log info: "MITRE CTI threat actor import complete — %s", sum
|
|
logger.info("MITRE CTI threat actor import complete — %s", summary)
|
|
|
|
# Call log_action()
|
|
log_action(
|
|
db,
|
|
# Keyword argument: user_id
|
|
user_id=None,
|
|
# Keyword argument: action
|
|
action="import_threat_actors",
|
|
# Keyword argument: entity_type
|
|
entity_type="threat_actor",
|
|
# Keyword argument: entity_id
|
|
entity_id=None,
|
|
# Keyword argument: details
|
|
details=summary,
|
|
)
|
|
# Commit all pending changes to the database
|
|
db.commit()
|
|
|
|
# Return summary
|
|
return summary
|