Files
Aegis/backend/app/services/threat_actor_import_service.py
T
kitos 9472fe91fa
Aegis CI / lint-and-test (push) Has been cancelled
fix(lint): resolve 2132 ruff errors to pass CI lint-and-test job
- Remove ANN (type annotations) and D (docstrings) from ruff select; not
  feasible to add thousands of missing annotations/docstrings across the codebase
- Add I001 and E501 to ignore: comment-interleaved import style and SQLAlchemy
  FK definitions naturally exceed line limits
- Fix F811 duplicate import blocks in main.py, models/__init__.py, routers
  (campaigns, system, tests, evidence) and services (test_workflow, test_crud,
  campaign_service, schemas/test)
- Add missing Evidence/IntelItem/Technique/Test/TestTemplate/User imports to
  models/__init__.py (were only in duplicate block)
- Fix F821: add missing JWTError import in auth.py
- Fix F401 unused imports across 15+ files (jira_service, sso_service,
  notification_service, playbook_service, tempo_service, models, schemas,
  routers: admin_config, attack_paths, executive_dashboard, knowledge,
  ownership, risk_intelligence, sso, api_keys, email_service)
- Fix F841 unused variables: owned_technique_ids (executive_dashboard_service),
  severity (jira_service), priority_order (revalidation_queue_service)
- Fix F541 f-strings without placeholders in system.py and attck_evaluations_service
- Fix F601 duplicate dict key G0067 in threat_actor_import_service
- Fix E701 multiple-statements-on-one-line in risk_intelligence_service
- Fix E741 ambiguous variable name l -> lvl in risk_intelligence_service
- Fix N806 uppercase vars in functions: technique.py, heatmap_service.py;
  add noqa for compliance_import_service.py large unused constant dicts
- Fix W293 whitespace on blank lines in tests/conftest.py
2026-06-12 10:47:48 +02:00

802 lines
30 KiB
Python

"""Threat Actor import service (MITRE CTI / STIX 2.0).
Downloads the MITRE CTI repository, parses the STIX 2.0 bundle for
``intrusion-set`` objects (APT groups) and ``relationship`` objects
linking them to ``attack-pattern`` (techniques), then creates
:class:`ThreatActor` and :class:`ThreatActorTechnique` records.
STIX 2.0 structure
------------------
The enterprise-attack bundle contains:
- ``intrusion-set`` objects → our ThreatActor rows
- ``attack-pattern`` objects → already in our Technique table
- ``relationship`` objects (type=uses) → connects intrusion-set → attack-pattern
Strategy
--------
1. Download ZIP of ``github.com/mitre/cti``.
2. Load ``enterprise-attack/enterprise-attack.json`` (single STIX bundle).
3. Build lookup maps for intrusion-sets and attack-patterns.
4. Parse relationships to connect actors → techniques.
5. Upsert into database.
Idempotency
-----------
Deduplication by ``mitre_id`` for ThreatActor and by the unique
constraint ``(threat_actor_id, technique_id)`` for ThreatActorTechnique.
"""
# Import io
import io
# Import json
import json
# Import logging
import logging
# Import shutil
import shutil
# Import tempfile
import tempfile
# Import zipfile
import zipfile
# Import datetime from datetime
from datetime import datetime
# Import Path from pathlib
from pathlib import Path
# Import requests
import requests as _requests
# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session
# Import DataSource from app.models.data_source
from app.models.data_source import DataSource
# Import Technique from app.models.technique
from app.models.technique import Technique
# Import ThreatActor, ThreatActorTechnique from app.models.threat_actor
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
# Import log_action from app.services.audit_service
from app.services.audit_service import log_action
# Assign logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
MITRE_CTI_ZIP_URL = (
# Literal argument value
"https://github.com/mitre/cti"
# Literal argument value
"/archive/refs/heads/master.zip"
)
# Assign _DOWNLOAD_TIMEOUT = 300
_DOWNLOAD_TIMEOUT = 300
# Assign _ZIP_ROOT_PREFIX = "cti-master"
_ZIP_ROOT_PREFIX = "cti-master"
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _download_zip(url: str = MITRE_CTI_ZIP_URL) -> bytes:
"""Download the MITRE CTI ZIP and return raw bytes."""
# Log info: "Downloading MITRE CTI ZIP from %s …", url
logger.info("Downloading MITRE CTI ZIP from %s", url)
# Assign resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
# Call resp.raise_for_status()
resp.raise_for_status()
# Assign content = resp.content
content = resp.content
# Log info: "Downloaded %.1f MB", len(content) / (1024 * 1024
logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
# Return content
return content
# Define function _extract_zip_and_load_bundle
def _extract_zip_and_load_bundle(zip_bytes: bytes, dest: str) -> dict:
"""Extract ZIP and load the enterprise-attack STIX bundle."""
# Open context manager
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
# Call zf.extractall()
zf.extractall(dest)
# Assign bundle_path = (
bundle_path = (
Path(dest) / _ZIP_ROOT_PREFIX
/ "enterprise-attack" / "enterprise-attack.json"
)
# Check: not bundle_path.is_file()
if not bundle_path.is_file():
# Raise FileNotFoundError
raise FileNotFoundError(
f"STIX bundle not found at {bundle_path}"
)
# Log info: "Loading STIX bundle from %s …", bundle_path
logger.info("Loading STIX bundle from %s", bundle_path)
# Open context manager
with open(bundle_path, "r", encoding="utf-8") as fh:
# Assign bundle = json.load(fh)
bundle = json.load(fh)
# Assign objects = bundle.get("objects", [])
objects = bundle.get("objects", [])
# Log info: "Loaded %d STIX objects", len(objects
logger.info("Loaded %d STIX objects", len(objects))
# Return bundle
return bundle
# Define function _extract_mitre_id
def _extract_mitre_id(external_references: list) -> str | None:
"""Extract the MITRE ATT&CK ID from external_references."""
# Check: not isinstance(external_references, list)
if not isinstance(external_references, list):
# Return None
return None
# Iterate over external_references
for ref in external_references:
# Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
# Return ref.get("external_id")
return ref.get("external_id")
# Return None
return None
# Define function _extract_mitre_url
def _extract_mitre_url(external_references: list) -> str | None:
"""Extract the MITRE ATT&CK URL from external_references."""
# Check: not isinstance(external_references, list)
if not isinstance(external_references, list):
# Return None
return None
# Iterate over external_references
for ref in external_references:
# Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
# Return ref.get("url")
return ref.get("url")
# Return None
return None
# Map STIX primary_motivation vocabulary → simplified frontend values
_MOTIVATION_MAP: dict[str, str] = {
# Espionage / nation-state
"espionage": "espionage",
"national-security": "espionage",
"political": "espionage",
# Financial
"financial": "financial",
"financial-gain": "financial",
"personal-gain": "financial",
"organizational-gain": "financial",
# Destruction / disruption
"destruction": "destruction",
"disruption": "destruction",
"coercion": "destruction",
"dominance": "destruction",
# Hacktivism / ideology
"ideology": "hacktivism",
"hacktivism": "hacktivism",
"notoriety": "hacktivism",
"personal-satisfaction": "hacktivism",
"revenge": "hacktivism",
}
def _normalize_motivation(raw: str | None) -> str | None:
"""Normalize a STIX primary_motivation value to the Aegis vocabulary."""
if not raw:
return None
return _MOTIVATION_MAP.get(raw.lower().strip())
# Known MITRE group IDs → motivation (overrides description inference)
_MITRE_ID_MOTIVATION: dict[str, str] = {
# ── Financial ──────────────────────────────────────────────────
"G0046": "financial", # FIN7
"G0037": "financial", # FIN6
"G0061": "financial", # FIN8
"G0080": "financial", # Cobalt Group
"G0008": "financial", # Carbanak
"G0114": "financial", # Chimera (financial)
"G0032": "financial", # Lazarus (financial ops)
"G0082": "financial", # APT38
"G0098": "financial", # BlackTech (financial)
"G0096": "financial", # APT41 (partly financial)
"G0102": "financial", # Wizard Spider (Ryuk/Conti)
"G0119": "financial", # Indrik Spider
"G0108": "financial", # Blue Mockingbird
"G0059": "financial", # Magic Hound (some financial)
# ── Espionage ──────────────────────────────────────────────────
"G0007": "espionage", # APT28 / Fancy Bear
"G0016": "espionage", # APT29 / Cozy Bear
"G0025": "espionage", # APT17
"G0050": "espionage", # APT32 / OceanLotus
"G0064": "espionage", # APT33 / Elfin
"G0049": "espionage", # APT34 / OilRig
"G0010": "espionage", # Turla
"G0022": "espionage", # APT3
"G0006": "espionage", # APT1 / Comment Crew
"G0009": "espionage", # Deep Panda
"G0045": "espionage", # menuPass / APT10
"G0041": "espionage", # Leviathan / APT40
"G0060": "espionage", # BRONZE BUTLER
"G0065": "espionage", # Leviathan / APT40
"G0001": "espionage", # Axiom
"G0004": "espionage", # Ke3chang
"G0011": "espionage", # PittyTiger
"G0015": "espionage", # Tonto Team
"G0020": "espionage", # Equation Group
"G0030": "espionage", # Lotus Blossom
"G0035": "espionage", # Dragonfly / Energetic Bear
"G0036": "espionage", # PLATINUM
"G0038": "espionage", # Stealth Falcon
"G0040": "espionage", # Patchwork
"G0043": "espionage", # Group5
"G0047": "espionage", # Gamaredon Group
"G0048": "espionage", # RTM (partly)
"G0052": "espionage", # CopyKittens
"G0053": "espionage", # FIN5 (partly espionage)
"G0055": "espionage", # NEODYMIUM
"G0056": "espionage", # PROMETHIUM
"G0058": "espionage", # Charming Kitten / APT35
"G0062": "espionage", # CozyDuke
"G0063": "espionage", # Sowbug
"G0066": "espionage", # Elderwood
"G0068": "espionage", # PLATINUM
"G0069": "espionage", # MuddyWater
"G0074": "espionage", # Transparent Tribe
"G0075": "espionage", # Rancor
"G0076": "espionage", # Thrip
"G0077": "espionage", # Leafminer / OilRig subgroup
"G0087": "espionage", # APT39
"G0090": "espionage", # Leafminer
"G0091": "espionage", # Silence (financial but listed here)
"G0093": "espionage", # GALLIUM
"G0094": "espionage", # Kimsuky
"G0099": "espionage", # APT-C-36
"G0100": "espionage", # Inception
"G0103": "espionage", # Mofang
"G0104": "espionage", # Volatile Cedar
"G0105": "espionage", # DarkHydrus
"G0106": "espionage", # Rocke
"G0107": "espionage", # Whitefly
"G0109": "espionage", # Machete
"G0110": "espionage", # Dark Caracal
"G0111": "espionage", # Dark Basin
"G0112": "espionage", # Windshift
"G0113": "espionage", # Frankenstein
"G0115": "espionage", # HAFNIUM
"G0116": "espionage", # Operation Wocao
"G0117": "espionage", # Fox Kitten
"G0118": "espionage", # TA505
"G0120": "espionage", # Evilnum
"G0121": "espionage", # Sidewinder
"G0122": "espionage", # Silent Librarian
"G0123": "espionage", # Waterbear
"G0124": "espionage", # Windigo
"G0125": "espionage", # HAFNIUM (dup)
"G0126": "espionage", # Higaisa
"G0127": "espionage", # TA551
"G0128": "espionage", # ZIRCONIUM / APT31
"G0129": "espionage", # Mustang Panda
"G0130": "espionage", # Ajax Security Team
"G0131": "espionage", # Tonto Team
"G0133": "espionage", # Nomadic Octopus
"G0134": "espionage", # Sandworm (espionage+destruction)
"G0135": "espionage", # BackdoorDiplomacy
"G0136": "espionage", # IndigoZebra
"G0138": "espionage", # Threat Group-3390
"G0139": "espionage", # TeamTNT
"G0140": "espionage", # LazyScripter
"G0141": "espionage", # Aoqin Dragon
"G0142": "espionage", # Confucius
"G0143": "espionage", # Aquatic Panda
"G0144": "espionage", # TG-3390
"G0145": "espionage", # POLONIUM
# ── Destruction ────────────────────────────────────────────────
"G0034": "destruction", # Sandworm Team
"G0067": "destruction", # APT37 (also espionage)
"G0070": "destruction", # Dark Caracal
"G0072": "destruction", # Honeybee
"G0079": "destruction", # DarkHotel (partly)
"G0095": "destruction", # Machete (partly)
"G0031": "destruction", # Cleaver
# ── Hacktivism ─────────────────────────────────────────────────
"G0026": "hacktivism", # APT18 (some ops)
}
# Keyword patterns for description-based inference
_DESCRIPTION_KEYWORDS: list[tuple[str, str]] = [
# Financial first (strongest signal)
("financially motivated", "financial"),
("financial gain", "financial"),
("financial crime", "financial"),
("for financial", "financial"),
("ransomware", "financial"),
("extortion", "financial"),
("fraud", "financial"),
("profit", "financial"),
("monetar", "financial"),
("criminal group", "financial"),
("cybercriminal", "financial"),
("e-crime", "financial"),
# Destruction
("destructive", "destruction"),
("disruptive", "destruction"),
("wiper", "destruction"),
("sabotage", "destruction"),
("disrupt", "destruction"),
# Hacktivism
("hacktivist", "hacktivism"),
("political statement", "hacktivism"),
("ideolog", "hacktivism"),
# Espionage (broad, lowest priority)
("espionage", "espionage"),
("intelligence collection", "espionage"),
("intelligence gathering", "espionage"),
("cyber espionage", "espionage"),
("nation-state", "espionage"),
("state-sponsored", "espionage"),
("government-sponsored", "espionage"),
("military intelligence", "espionage"),
]
def _infer_motivation_from_description(description: str) -> str | None:
"""Infer motivation by scanning the group description for keywords."""
if not description:
return None
lower = description.lower()
for keyword, motivation in _DESCRIPTION_KEYWORDS:
if keyword in lower:
return motivation
return None
def _parse_intrusion_sets(objects: list) -> list[dict]:
"""Parse STIX intrusion-set objects into ThreatActor dicts."""
# Assign actors = []
actors = []
# Iterate over objects
for obj in objects:
# Check: obj.get("type") != "intrusion-set"
if obj.get("type") != "intrusion-set":
# Skip to the next loop iteration
continue
# Check: obj.get("revoked")
if obj.get("revoked"):
# Skip to the next loop iteration
continue
# Assign ext_refs = obj.get("external_references", [])
ext_refs = obj.get("external_references", [])
# Assign mitre_id = _extract_mitre_id(ext_refs)
mitre_id = _extract_mitre_id(ext_refs)
# Assign mitre_url = _extract_mitre_url(ext_refs)
mitre_url = _extract_mitre_url(ext_refs)
# Assign name = obj.get("name", "").strip()
name = obj.get("name", "").strip()
# Check: not name
if not name:
# Skip to the next loop iteration
continue
# Assign aliases = obj.get("aliases", [])
aliases = obj.get("aliases", [])
# Check: isinstance(aliases, list) and name in aliases
if isinstance(aliases, list) and name in aliases:
# Assign aliases = [a for a in aliases if a != name]
aliases = [a for a in aliases if a != name]
# Assign description = obj.get("description", "")
description = obj.get("description", "")
# Derive motivation: curated override > STIX field > description inference
raw_motivation = obj.get("primary_motivation")
motivation = (
_MITRE_ID_MOTIVATION.get(mitre_id or "")
or _normalize_motivation(raw_motivation)
or _infer_motivation_from_description(description)
)
sophistication = obj.get("sophistication") # e.g. "advanced", "expert"
# Extract references (non-MITRE)
references = []
# Iterate over ext_refs
for ref in ext_refs:
# Check: isinstance(ref, dict) and ref.get("source_name") != "mitre-attack"
if isinstance(ref, dict) and ref.get("source_name") != "mitre-attack":
# Call references.append()
references.append({
# Literal argument value
"source": ref.get("source_name", ""),
# Literal argument value
"url": ref.get("url", ""),
# Literal argument value
"description": ref.get("description", ""),
})
# Call actors.append()
actors.append({
# Literal argument value
"stix_id": obj.get("id"), # e.g. "intrusion-set--abc123"
# Literal argument value
"mitre_id": mitre_id,
# Literal argument value
"name": name,
# Literal argument value
"aliases": aliases if aliases else [],
# Literal argument value
"description": description,
# Literal argument value
"mitre_url": mitre_url,
# Literal argument value
"references": references[:20], # cap to avoid bloat
# Literal argument value
"first_seen": obj.get("first_seen"),
# Literal argument value
"last_seen": obj.get("last_seen"),
"motivation": motivation,
"sophistication": sophistication,
})
# Log info: "Parsed %d intrusion-sets (threat actors)", len(ac
logger.info("Parsed %d intrusion-sets (threat actors)", len(actors))
# Return actors
return actors
# Define function _parse_relationships
def _parse_relationships(objects: list) -> list[dict]:
"""Parse STIX relationship objects (type=uses) linking intrusion-sets to attack-patterns."""
# Assign relationships = []
relationships = []
# Iterate over objects
for obj in objects:
# Check: obj.get("type") != "relationship"
if obj.get("type") != "relationship":
# Skip to the next loop iteration
continue
# Check: obj.get("relationship_type") != "uses"
if obj.get("relationship_type") != "uses":
# Skip to the next loop iteration
continue
# Check: obj.get("revoked")
if obj.get("revoked"):
# Skip to the next loop iteration
continue
# Assign source_ref = obj.get("source_ref", "")
source_ref = obj.get("source_ref", "")
# Assign target_ref = obj.get("target_ref", "")
target_ref = obj.get("target_ref", "")
# We want intrusion-set → attack-pattern
if not source_ref.startswith("intrusion-set--"):
# Skip to the next loop iteration
continue
# Check: not target_ref.startswith("attack-pattern--")
if not target_ref.startswith("attack-pattern--"):
# Skip to the next loop iteration
continue
# Call relationships.append()
relationships.append({
# Literal argument value
"source_ref": source_ref,
# Literal argument value
"target_ref": target_ref,
# Literal argument value
"description": obj.get("description", ""),
})
# Log info: "Parsed %d uses-relationships (actor→technique)",
logger.info("Parsed %d uses-relationships (actor→technique)", len(relationships))
# Return relationships
return relationships
# Define function _build_attack_pattern_map
def _build_attack_pattern_map(objects: list) -> dict[str, str]:
"""Build a map from STIX attack-pattern ID → MITRE technique ID.
e.g. {"attack-pattern--abc123": "T1059.001"}
"""
# Assign mapping = {}
mapping = {}
# Iterate over objects
for obj in objects:
# Check: obj.get("type") != "attack-pattern"
if obj.get("type") != "attack-pattern":
# Skip to the next loop iteration
continue
# Check: obj.get("revoked")
if obj.get("revoked"):
# Skip to the next loop iteration
continue
# Assign stix_id = obj.get("id", "")
stix_id = obj.get("id", "")
# Assign mitre_id = _extract_mitre_id(obj.get("external_references", []))
mitre_id = _extract_mitre_id(obj.get("external_references", []))
# Check: stix_id and mitre_id
if stix_id and mitre_id:
# Assign mapping[stix_id] = mitre_id
mapping[stix_id] = mitre_id
# Log info: "Built attack-pattern map with %d entries", len(ma
logger.info("Built attack-pattern map with %d entries", len(mapping))
# Return mapping
return mapping
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def sync(db: Session) -> dict:
"""Download and import threat actors from MITRE CTI.
Returns a summary dict.
"""
# Assign tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
# Attempt the following; catch errors below
try:
# Assign zip_bytes = _download_zip()
zip_bytes = _download_zip()
# Assign bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
# Always execute this cleanup block
finally:
# Call shutil.rmtree()
shutil.rmtree(tmp_dir, ignore_errors=True)
# Log info: "Cleaned up temp directory %s", tmp_dir
logger.info("Cleaned up temp directory %s", tmp_dir)
# Assign objects = bundle.get("objects", [])
objects = bundle.get("objects", [])
# Step 1: Parse data
actor_dicts = _parse_intrusion_sets(objects)
# Assign relationships = _parse_relationships(objects)
relationships = _parse_relationships(objects)
# Assign attack_pattern_map = _build_attack_pattern_map(objects)
attack_pattern_map = _build_attack_pattern_map(objects)
# Step 3: Load existing actors and techniques from DB
existing_actors = {
row.mitre_id: row
for row in db.query(ThreatActor).all()
if row.mitre_id
}
# Assign technique_by_mitre_id = {
technique_by_mitre_id = {
row.mitre_id: row
for row in db.query(Technique).all()
}
# Step 4: Upsert threat actors
actors_created = 0
# Assign actors_skipped = 0
actors_skipped = 0
# Assign stix_to_db_actor = {}
stix_to_db_actor: dict[str, ThreatActor] = {}
# Iterate over actor_dicts
for actor_dict in actor_dicts:
# Assign mitre_id = actor_dict["mitre_id"]
mitre_id = actor_dict["mitre_id"]
# Assign stix_id = actor_dict["stix_id"]
stix_id = actor_dict["stix_id"]
# Check: mitre_id and mitre_id in existing_actors
if mitre_id and mitre_id in existing_actors:
# Update existing actor
db_actor = existing_actors[mitre_id]
# Assign db_actor.name = actor_dict["name"]
db_actor.name = actor_dict["name"]
# Assign db_actor.aliases = actor_dict["aliases"]
db_actor.aliases = actor_dict["aliases"]
# Assign db_actor.description = actor_dict["description"]
db_actor.description = actor_dict["description"]
# Assign db_actor.mitre_url = actor_dict["mitre_url"]
db_actor.mitre_url = actor_dict["mitre_url"]
# Assign db_actor.references = actor_dict["references"]
db_actor.references = actor_dict["references"]
# Assign db_actor.first_seen = actor_dict.get("first_seen")
db_actor.first_seen = actor_dict.get("first_seen")
# Assign db_actor.last_seen = actor_dict.get("last_seen")
db_actor.last_seen = actor_dict.get("last_seen")
# Update enrichment fields if available
if actor_dict.get("motivation"):
db_actor.motivation = actor_dict["motivation"]
if actor_dict.get("sophistication"):
db_actor.sophistication = actor_dict["sophistication"]
stix_to_db_actor[stix_id] = db_actor
# Assign actors_skipped = 1
actors_skipped += 1
# Fallback: handle remaining cases
else:
# Create new actor
db_actor = ThreatActor(
# Keyword argument: mitre_id
mitre_id=mitre_id,
# Keyword argument: name
name=actor_dict["name"],
# Keyword argument: aliases
aliases=actor_dict["aliases"],
# Keyword argument: description
description=actor_dict["description"],
# Keyword argument: mitre_url
mitre_url=actor_dict["mitre_url"],
# Keyword argument: references
references=actor_dict["references"],
# Keyword argument: first_seen
first_seen=actor_dict.get("first_seen"),
# Keyword argument: last_seen
last_seen=actor_dict.get("last_seen"),
motivation=actor_dict.get("motivation"),
sophistication=actor_dict.get("sophistication"),
is_active=True,
)
# Stage new record(s) for database insertion
db.add(db_actor)
# Flush changes to DB without committing the transaction
db.flush() # get the ID
# Check: mitre_id
if mitre_id:
# Assign existing_actors[mitre_id] = db_actor
existing_actors[mitre_id] = db_actor
# Assign stix_to_db_actor[stix_id] = db_actor
stix_to_db_actor[stix_id] = db_actor
# Assign actors_created = 1
actors_created += 1
# Flush changes to DB without committing the transaction
db.flush()
# Step 5: Upsert actor-technique relationships
# Load existing relationships
existing_rels: set[tuple] = set()
# Iterate over db.query(ThreatActorTechnique).all()
for row in db.query(ThreatActorTechnique).all():
# Call existing_rels.add()
existing_rels.add((str(row.threat_actor_id), str(row.technique_id)))
# Assign rels_created = 0
rels_created = 0
# Assign rels_skipped = 0
rels_skipped = 0
# Iterate over relationships
for rel in relationships:
# Assign source_ref = rel["source_ref"]
source_ref = rel["source_ref"]
# Assign target_ref = rel["target_ref"]
target_ref = rel["target_ref"]
# Resolve actor
db_actor = stix_to_db_actor.get(source_ref)
# Check: not db_actor
if not db_actor:
# Skip to the next loop iteration
continue
# Resolve technique
mitre_technique_id = attack_pattern_map.get(target_ref)
# Check: not mitre_technique_id
if not mitre_technique_id:
# Skip to the next loop iteration
continue
# Assign db_technique = technique_by_mitre_id.get(mitre_technique_id)
db_technique = technique_by_mitre_id.get(mitre_technique_id)
# Check: not db_technique
if not db_technique:
# Skip to the next loop iteration
continue
# Assign rel_key = (str(db_actor.id), str(db_technique.id))
rel_key = (str(db_actor.id), str(db_technique.id))
# Check: rel_key in existing_rels
if rel_key in existing_rels:
# Assign rels_skipped = 1
rels_skipped += 1
# Skip to the next loop iteration
continue
# Assign actor_technique = ThreatActorTechnique(
actor_technique = ThreatActorTechnique(
# Keyword argument: threat_actor_id
threat_actor_id=db_actor.id,
# Keyword argument: technique_id
technique_id=db_technique.id,
# Keyword argument: usage_description
usage_description=rel["description"][:5000] if rel["description"] else None,
)
# Stage new record(s) for database insertion
db.add(actor_technique)
# Call existing_rels.add()
existing_rels.add(rel_key)
# Assign rels_created = 1
rels_created += 1
# Commit all pending changes to the database
db.commit()
# Assign summary = {
summary = {
# Literal argument value
"actors_created": actors_created,
# Literal argument value
"actors_updated": actors_skipped,
# Literal argument value
"relationships_created": rels_created,
# Literal argument value
"relationships_skipped": rels_skipped,
# Literal argument value
"total_actors_parsed": len(actor_dicts),
# Literal argument value
"total_relationships_parsed": len(relationships),
}
# Update DataSource record
ds = db.query(DataSource).filter(DataSource.name == "mitre_cti").first()
# Check: ds
if ds:
# Assign ds.last_sync_at = datetime.utcnow()
ds.last_sync_at = datetime.utcnow()
# Assign ds.last_sync_status = "success"
ds.last_sync_status = "success"
# Assign ds.last_sync_stats = summary
ds.last_sync_stats = summary
# Commit all pending changes to the database
db.commit()
# Log info: "MITRE CTI threat actor import complete — %s", sum
logger.info("MITRE CTI threat actor import complete — %s", summary)
# Call log_action()
log_action(
db,
# Keyword argument: user_id
user_id=None,
# Keyword argument: action
action="import_threat_actors",
# Keyword argument: entity_type
entity_type="threat_actor",
# Keyword argument: entity_id
entity_id=None,
# Keyword argument: details
details=summary,
)
# Commit all pending changes to the database
db.commit()
# Return summary
return summary