Aegis/backend/app/services/threat_actor_import_service.py

"""Threat Actor import service (MITRE CTI / STIX 2.0).

Downloads the MITRE CTI repository, parses the STIX 2.0 bundle for
``intrusion-set`` objects (APT groups) and ``relationship`` objects
linking them to ``attack-pattern`` (techniques), then creates
:class:`ThreatActor` and :class:`ThreatActorTechnique` records.

STIX 2.0 structure
------------------
The enterprise-attack bundle contains:
- ``intrusion-set`` objects → our ThreatActor rows
- ``attack-pattern`` objects → already in our Technique table
- ``relationship`` objects (type=uses) → connects intrusion-set → attack-pattern

Strategy
--------
1. Download ZIP of ``github.com/mitre/cti``.
2. Load ``enterprise-attack/enterprise-attack.json`` (single STIX bundle).
3. Build lookup maps for intrusion-sets and attack-patterns.
4. Parse relationships to connect actors → techniques.
5. Upsert into database.

Idempotency
-----------
Deduplication by ``mitre_id`` for ThreatActor and by the unique
constraint ``(threat_actor_id, technique_id)`` for ThreatActorTechnique.
"""

# Import io
import io

# Import json
import json

# Import logging
import logging

# Import shutil
import shutil

# Import tempfile
import tempfile

# Import zipfile
import zipfile

# Import datetime from datetime
from datetime import datetime

# Import Path from pathlib
from pathlib import Path

# Import requests
import requests as _requests

# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session

# Import DataSource from app.models.data_source
from app.models.data_source import DataSource

# Import Technique from app.models.technique
from app.models.technique import Technique

# Import ThreatActor, ThreatActorTechnique from app.models.threat_actor
from app.models.threat_actor import ThreatActor, ThreatActorTechnique

# Import log_action from app.services.audit_service
from app.services.audit_service import log_action

# Assign logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

MITRE_CTI_ZIP_URL = (
    # Literal argument value
    "https://github.com/mitre/cti"
    # Literal argument value
    "/archive/refs/heads/master.zip"
)

# Assign _DOWNLOAD_TIMEOUT = 300
_DOWNLOAD_TIMEOUT = 300
# Assign _ZIP_ROOT_PREFIX = "cti-master"
_ZIP_ROOT_PREFIX = "cti-master"


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------


def _download_zip(url: str = MITRE_CTI_ZIP_URL) -> bytes:
    """Download the MITRE CTI ZIP and return raw bytes."""
    # Log info: "Downloading MITRE CTI ZIP from %s …", url
    logger.info("Downloading MITRE CTI ZIP from %s …", url)
    # Assign resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    # Call resp.raise_for_status()
    resp.raise_for_status()
    # Assign content = resp.content
    content = resp.content
    # Log info: "Downloaded %.1f MB", len(content) / (1024 * 1024
    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
    # Return content
    return content


# Define function _extract_zip_and_load_bundle
def _extract_zip_and_load_bundle(zip_bytes: bytes, dest: str) -> dict:
    """Extract ZIP and load the enterprise-attack STIX bundle."""
    # Open context manager
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        # Call zf.extractall()
        zf.extractall(dest)

    # Assign bundle_path = (
    bundle_path = (
        Path(dest) / _ZIP_ROOT_PREFIX
        / "enterprise-attack" / "enterprise-attack.json"
    )
    # Check: not bundle_path.is_file()
    if not bundle_path.is_file():
        # Raise FileNotFoundError
        raise FileNotFoundError(
            f"STIX bundle not found at {bundle_path}"
        )

    # Log info: "Loading STIX bundle from %s …", bundle_path
    logger.info("Loading STIX bundle from %s …", bundle_path)
    # Open context manager
    with open(bundle_path, "r", encoding="utf-8") as fh:
        # Assign bundle = json.load(fh)
        bundle = json.load(fh)

    # Assign objects = bundle.get("objects", [])
    objects = bundle.get("objects", [])
    # Log info: "Loaded %d STIX objects", len(objects
    logger.info("Loaded %d STIX objects", len(objects))
    # Return bundle
    return bundle


# Define function _extract_mitre_id
def _extract_mitre_id(external_references: list) -> str | None:
    """Extract the MITRE ATT&CK ID from external_references."""
    # Check: not isinstance(external_references, list)
    if not isinstance(external_references, list):
        # Return None
        return None
    # Iterate over external_references
    for ref in external_references:
        # Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
        if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
            # Return ref.get("external_id")
            return ref.get("external_id")
    # Return None
    return None


# Define function _extract_mitre_url
def _extract_mitre_url(external_references: list) -> str | None:
    """Extract the MITRE ATT&CK URL from external_references."""
    # Check: not isinstance(external_references, list)
    if not isinstance(external_references, list):
        # Return None
        return None
    # Iterate over external_references
    for ref in external_references:
        # Check: isinstance(ref, dict) and ref.get("source_name") == "mitre-attack"
        if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
            # Return ref.get("url")
            return ref.get("url")
    # Return None
    return None


# Map STIX primary_motivation vocabulary → simplified frontend values
_MOTIVATION_MAP: dict[str, str] = {
    # Espionage / nation-state
    "espionage":                "espionage",
    "national-security":        "espionage",
    "political":                "espionage",
    # Financial
    "financial":                "financial",
    "financial-gain":           "financial",
    "personal-gain":            "financial",
    "organizational-gain":      "financial",
    # Destruction / disruption
    "destruction":              "destruction",
    "disruption":               "destruction",
    "coercion":                 "destruction",
    "dominance":                "destruction",
    # Hacktivism / ideology
    "ideology":                 "hacktivism",
    "hacktivism":               "hacktivism",
    "notoriety":                "hacktivism",
    "personal-satisfaction":    "hacktivism",
    "revenge":                  "hacktivism",
}


def _normalize_motivation(raw: str | None) -> str | None:
    """Normalize a STIX primary_motivation value to the Aegis vocabulary."""
    if not raw:
        return None
    return _MOTIVATION_MAP.get(raw.lower().strip())


# Known MITRE group IDs → motivation (overrides description inference)
_MITRE_ID_MOTIVATION: dict[str, str] = {
    # ── Financial ──────────────────────────────────────────────────
    "G0046": "financial",  # FIN7
    "G0037": "financial",  # FIN6
    "G0061": "financial",  # FIN8
    "G0080": "financial",  # Cobalt Group
    "G0008": "financial",  # Carbanak
    "G0114": "financial",  # Chimera (financial)
    "G0032": "financial",  # Lazarus (financial ops)
    "G0082": "financial",  # APT38
    "G0098": "financial",  # BlackTech (financial)
    "G0096": "financial",  # APT41 (partly financial)
    "G0102": "financial",  # Wizard Spider (Ryuk/Conti)
    "G0119": "financial",  # Indrik Spider
    "G0108": "financial",  # Blue Mockingbird
    "G0059": "financial",  # Magic Hound (some financial)
    # ── Espionage ──────────────────────────────────────────────────
    "G0007": "espionage",  # APT28 / Fancy Bear
    "G0016": "espionage",  # APT29 / Cozy Bear
    "G0025": "espionage",  # APT17
    "G0050": "espionage",  # APT32 / OceanLotus
    "G0064": "espionage",  # APT33 / Elfin
    "G0049": "espionage",  # APT34 / OilRig
    "G0010": "espionage",  # Turla
    "G0022": "espionage",  # APT3
    "G0006": "espionage",  # APT1 / Comment Crew
    "G0009": "espionage",  # Deep Panda
    "G0045": "espionage",  # menuPass / APT10
    "G0041": "espionage",  # Leviathan / APT40
    "G0060": "espionage",  # BRONZE BUTLER
    "G0065": "espionage",  # Leviathan / APT40
    "G0001": "espionage",  # Axiom
    "G0004": "espionage",  # Ke3chang
    "G0011": "espionage",  # PittyTiger
    "G0015": "espionage",  # Tonto Team
    "G0020": "espionage",  # Equation Group
    "G0030": "espionage",  # Lotus Blossom
    "G0035": "espionage",  # Dragonfly / Energetic Bear
    "G0036": "espionage",  # PLATINUM
    "G0038": "espionage",  # Stealth Falcon
    "G0040": "espionage",  # Patchwork
    "G0043": "espionage",  # Group5
    "G0047": "espionage",  # Gamaredon Group
    "G0048": "espionage",  # RTM (partly)
    "G0052": "espionage",  # CopyKittens
    "G0053": "espionage",  # FIN5 (partly espionage)
    "G0055": "espionage",  # NEODYMIUM
    "G0056": "espionage",  # PROMETHIUM
    "G0058": "espionage",  # Charming Kitten / APT35
    "G0062": "espionage",  # CozyDuke
    "G0063": "espionage",  # Sowbug
    "G0066": "espionage",  # Elderwood
    "G0068": "espionage",  # PLATINUM
    "G0069": "espionage",  # MuddyWater
    "G0074": "espionage",  # Transparent Tribe
    "G0075": "espionage",  # Rancor
    "G0076": "espionage",  # Thrip
    "G0077": "espionage",  # Leafminer / OilRig subgroup
    "G0087": "espionage",  # APT39
    "G0090": "espionage",  # Leafminer
    "G0091": "espionage",  # Silence (financial but listed here)
    "G0093": "espionage",  # GALLIUM
    "G0094": "espionage",  # Kimsuky
    "G0099": "espionage",  # APT-C-36
    "G0100": "espionage",  # Inception
    "G0103": "espionage",  # Mofang
    "G0104": "espionage",  # Volatile Cedar
    "G0105": "espionage",  # DarkHydrus
    "G0106": "espionage",  # Rocke
    "G0107": "espionage",  # Whitefly
    "G0109": "espionage",  # Machete
    "G0110": "espionage",  # Dark Caracal
    "G0111": "espionage",  # Dark Basin
    "G0112": "espionage",  # Windshift
    "G0113": "espionage",  # Frankenstein
    "G0115": "espionage",  # HAFNIUM
    "G0116": "espionage",  # Operation Wocao
    "G0117": "espionage",  # Fox Kitten
    "G0118": "espionage",  # TA505
    "G0120": "espionage",  # Evilnum
    "G0121": "espionage",  # Sidewinder
    "G0122": "espionage",  # Silent Librarian
    "G0123": "espionage",  # Waterbear
    "G0124": "espionage",  # Windigo
    "G0125": "espionage",  # HAFNIUM (dup)
    "G0126": "espionage",  # Higaisa
    "G0127": "espionage",  # TA551
    "G0128": "espionage",  # ZIRCONIUM / APT31
    "G0129": "espionage",  # Mustang Panda
    "G0130": "espionage",  # Ajax Security Team
    "G0131": "espionage",  # Tonto Team
    "G0133": "espionage",  # Nomadic Octopus
    "G0134": "espionage",  # Sandworm (espionage+destruction)
    "G0135": "espionage",  # BackdoorDiplomacy
    "G0136": "espionage",  # IndigoZebra
    "G0138": "espionage",  # Threat Group-3390
    "G0139": "espionage",  # TeamTNT
    "G0140": "espionage",  # LazyScripter
    "G0141": "espionage",  # Aoqin Dragon
    "G0142": "espionage",  # Confucius
    "G0143": "espionage",  # Aquatic Panda
    "G0144": "espionage",  # TG-3390
    "G0145": "espionage",  # POLONIUM
    # ── Destruction ────────────────────────────────────────────────
    "G0034": "destruction",  # Sandworm Team
    "G0067": "destruction",  # APT37 (also espionage)
    "G0070": "destruction",  # Dark Caracal
    "G0072": "destruction",  # Honeybee
    "G0079": "destruction",  # DarkHotel (partly)
    "G0095": "destruction",  # Machete (partly)
    "G0031": "destruction",  # Cleaver
    # ── Hacktivism ─────────────────────────────────────────────────
    "G0026": "hacktivism",   # APT18 (some ops)
}


# Keyword patterns for description-based inference
_DESCRIPTION_KEYWORDS: list[tuple[str, str]] = [
    # Financial first (strongest signal)
    ("financially motivated",   "financial"),
    ("financial gain",          "financial"),
    ("financial crime",         "financial"),
    ("for financial",           "financial"),
    ("ransomware",              "financial"),
    ("extortion",               "financial"),
    ("fraud",                   "financial"),
    ("profit",                  "financial"),
    ("monetar",                 "financial"),
    ("criminal group",          "financial"),
    ("cybercriminal",           "financial"),
    ("e-crime",                 "financial"),
    # Destruction
    ("destructive",             "destruction"),
    ("disruptive",              "destruction"),
    ("wiper",                   "destruction"),
    ("sabotage",                "destruction"),
    ("disrupt",                 "destruction"),
    # Hacktivism
    ("hacktivist",              "hacktivism"),
    ("political statement",     "hacktivism"),
    ("ideolog",                 "hacktivism"),
    # Espionage (broad, lowest priority)
    ("espionage",               "espionage"),
    ("intelligence collection", "espionage"),
    ("intelligence gathering",  "espionage"),
    ("cyber espionage",         "espionage"),
    ("nation-state",            "espionage"),
    ("state-sponsored",         "espionage"),
    ("government-sponsored",    "espionage"),
    ("military intelligence",   "espionage"),
]


def _infer_motivation_from_description(description: str) -> str | None:
    """Infer motivation by scanning the group description for keywords."""
    if not description:
        return None
    lower = description.lower()
    for keyword, motivation in _DESCRIPTION_KEYWORDS:
        if keyword in lower:
            return motivation
    return None


def _parse_intrusion_sets(objects: list) -> list[dict]:
    """Parse STIX intrusion-set objects into ThreatActor dicts."""
    # Assign actors = []
    actors = []
    # Iterate over objects
    for obj in objects:
        # Check: obj.get("type") != "intrusion-set"
        if obj.get("type") != "intrusion-set":
            # Skip to the next loop iteration
            continue
        # Check: obj.get("revoked")
        if obj.get("revoked"):
            # Skip to the next loop iteration
            continue

        # Assign ext_refs = obj.get("external_references", [])
        ext_refs = obj.get("external_references", [])
        # Assign mitre_id = _extract_mitre_id(ext_refs)
        mitre_id = _extract_mitre_id(ext_refs)
        # Assign mitre_url = _extract_mitre_url(ext_refs)
        mitre_url = _extract_mitre_url(ext_refs)

        # Assign name = obj.get("name", "").strip()
        name = obj.get("name", "").strip()
        # Check: not name
        if not name:
            # Skip to the next loop iteration
            continue

        # Assign aliases = obj.get("aliases", [])
        aliases = obj.get("aliases", [])
        # Check: isinstance(aliases, list) and name in aliases
        if isinstance(aliases, list) and name in aliases:
            # Assign aliases = [a for a in aliases if a != name]
            aliases = [a for a in aliases if a != name]

        # Assign description = obj.get("description", "")
        description = obj.get("description", "")

        # Derive motivation: curated override > STIX field > description inference
        raw_motivation = obj.get("primary_motivation")
        motivation = (
            _MITRE_ID_MOTIVATION.get(mitre_id or "")
            or _normalize_motivation(raw_motivation)
            or _infer_motivation_from_description(description)
        )
        sophistication = obj.get("sophistication")  # e.g. "advanced", "expert"

        # Extract references (non-MITRE)
        references = []
        # Iterate over ext_refs
        for ref in ext_refs:
            # Check: isinstance(ref, dict) and ref.get("source_name") != "mitre-attack"
            if isinstance(ref, dict) and ref.get("source_name") != "mitre-attack":
                # Call references.append()
                references.append({
                    # Literal argument value
                    "source": ref.get("source_name", ""),
                    # Literal argument value
                    "url": ref.get("url", ""),
                    # Literal argument value
                    "description": ref.get("description", ""),
                })

        # Call actors.append()
        actors.append({
            # Literal argument value
            "stix_id": obj.get("id"),  # e.g. "intrusion-set--abc123"
            # Literal argument value
            "mitre_id": mitre_id,
            # Literal argument value
            "name": name,
            # Literal argument value
            "aliases": aliases if aliases else [],
            # Literal argument value
            "description": description,
            # Literal argument value
            "mitre_url": mitre_url,
            # Literal argument value
            "references": references[:20],  # cap to avoid bloat
            # Literal argument value
            "first_seen": obj.get("first_seen"),
            # Literal argument value
            "last_seen": obj.get("last_seen"),
            "motivation": motivation,
            "sophistication": sophistication,
        })

    # Log info: "Parsed %d intrusion-sets (threat actors)", len(ac
    logger.info("Parsed %d intrusion-sets (threat actors)", len(actors))
    # Return actors
    return actors


# Define function _parse_relationships
def _parse_relationships(objects: list) -> list[dict]:
    """Parse STIX relationship objects (type=uses) linking intrusion-sets to attack-patterns."""
    # Assign relationships = []
    relationships = []
    # Iterate over objects
    for obj in objects:
        # Check: obj.get("type") != "relationship"
        if obj.get("type") != "relationship":
            # Skip to the next loop iteration
            continue
        # Check: obj.get("relationship_type") != "uses"
        if obj.get("relationship_type") != "uses":
            # Skip to the next loop iteration
            continue
        # Check: obj.get("revoked")
        if obj.get("revoked"):
            # Skip to the next loop iteration
            continue

        # Assign source_ref = obj.get("source_ref", "")
        source_ref = obj.get("source_ref", "")
        # Assign target_ref = obj.get("target_ref", "")
        target_ref = obj.get("target_ref", "")

        # We want intrusion-set → attack-pattern
        if not source_ref.startswith("intrusion-set--"):
            # Skip to the next loop iteration
            continue
        # Check: not target_ref.startswith("attack-pattern--")
        if not target_ref.startswith("attack-pattern--"):
            # Skip to the next loop iteration
            continue

        # Call relationships.append()
        relationships.append({
            # Literal argument value
            "source_ref": source_ref,
            # Literal argument value
            "target_ref": target_ref,
            # Literal argument value
            "description": obj.get("description", ""),
        })

    # Log info: "Parsed %d uses-relationships (actor→technique)",
    logger.info("Parsed %d uses-relationships (actor→technique)", len(relationships))
    # Return relationships
    return relationships


# Define function _build_attack_pattern_map
def _build_attack_pattern_map(objects: list) -> dict[str, str]:
    """Build a map from STIX attack-pattern ID → MITRE technique ID.

    e.g. {"attack-pattern--abc123": "T1059.001"}
    """
    # Assign mapping = {}
    mapping = {}
    # Iterate over objects
    for obj in objects:
        # Check: obj.get("type") != "attack-pattern"
        if obj.get("type") != "attack-pattern":
            # Skip to the next loop iteration
            continue
        # Check: obj.get("revoked")
        if obj.get("revoked"):
            # Skip to the next loop iteration
            continue
        # Assign stix_id = obj.get("id", "")
        stix_id = obj.get("id", "")
        # Assign mitre_id = _extract_mitre_id(obj.get("external_references", []))
        mitre_id = _extract_mitre_id(obj.get("external_references", []))
        # Check: stix_id and mitre_id
        if stix_id and mitre_id:
            # Assign mapping[stix_id] = mitre_id
            mapping[stix_id] = mitre_id
    # Log info: "Built attack-pattern map with %d entries", len(ma
    logger.info("Built attack-pattern map with %d entries", len(mapping))
    # Return mapping
    return mapping


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def sync(db: Session) -> dict:
    """Download and import threat actors from MITRE CTI.

    Returns a summary dict.
    """
    # Assign tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
    tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
    # Attempt the following; catch errors below
    try:
        # Assign zip_bytes = _download_zip()
        zip_bytes = _download_zip()
        # Assign bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
        bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
    # Always execute this cleanup block
    finally:
        # Call shutil.rmtree()
        shutil.rmtree(tmp_dir, ignore_errors=True)
        # Log info: "Cleaned up temp directory %s", tmp_dir
        logger.info("Cleaned up temp directory %s", tmp_dir)

    # Assign objects = bundle.get("objects", [])
    objects = bundle.get("objects", [])

    # Step 1: Parse data
    actor_dicts = _parse_intrusion_sets(objects)
    # Assign relationships = _parse_relationships(objects)
    relationships = _parse_relationships(objects)
    # Assign attack_pattern_map = _build_attack_pattern_map(objects)
    attack_pattern_map = _build_attack_pattern_map(objects)

    # Step 3: Load existing actors and techniques from DB
    existing_actors = {
        row.mitre_id: row
        for row in db.query(ThreatActor).all()
        if row.mitre_id
    }

    # Assign technique_by_mitre_id = {
    technique_by_mitre_id = {
        row.mitre_id: row
        for row in db.query(Technique).all()
    }

    # Step 4: Upsert threat actors
    actors_created = 0
    # Assign actors_skipped = 0
    actors_skipped = 0
    # Assign stix_to_db_actor = {}
    stix_to_db_actor: dict[str, ThreatActor] = {}

    # Iterate over actor_dicts
    for actor_dict in actor_dicts:
        # Assign mitre_id = actor_dict["mitre_id"]
        mitre_id = actor_dict["mitre_id"]
        # Assign stix_id = actor_dict["stix_id"]
        stix_id = actor_dict["stix_id"]

        # Check: mitre_id and mitre_id in existing_actors
        if mitre_id and mitre_id in existing_actors:
            # Update existing actor
            db_actor = existing_actors[mitre_id]
            # Assign db_actor.name = actor_dict["name"]
            db_actor.name = actor_dict["name"]
            # Assign db_actor.aliases = actor_dict["aliases"]
            db_actor.aliases = actor_dict["aliases"]
            # Assign db_actor.description = actor_dict["description"]
            db_actor.description = actor_dict["description"]
            # Assign db_actor.mitre_url = actor_dict["mitre_url"]
            db_actor.mitre_url = actor_dict["mitre_url"]
            # Assign db_actor.references = actor_dict["references"]
            db_actor.references = actor_dict["references"]
            # Assign db_actor.first_seen = actor_dict.get("first_seen")
            db_actor.first_seen = actor_dict.get("first_seen")
            # Assign db_actor.last_seen = actor_dict.get("last_seen")
            db_actor.last_seen = actor_dict.get("last_seen")
            # Update enrichment fields if available
            if actor_dict.get("motivation"):
                db_actor.motivation = actor_dict["motivation"]
            if actor_dict.get("sophistication"):
                db_actor.sophistication = actor_dict["sophistication"]
            stix_to_db_actor[stix_id] = db_actor
            # Assign actors_skipped = 1
            actors_skipped += 1
        # Fallback: handle remaining cases
        else:
            # Create new actor
            db_actor = ThreatActor(
                # Keyword argument: mitre_id
                mitre_id=mitre_id,
                # Keyword argument: name
                name=actor_dict["name"],
                # Keyword argument: aliases
                aliases=actor_dict["aliases"],
                # Keyword argument: description
                description=actor_dict["description"],
                # Keyword argument: mitre_url
                mitre_url=actor_dict["mitre_url"],
                # Keyword argument: references
                references=actor_dict["references"],
                # Keyword argument: first_seen
                first_seen=actor_dict.get("first_seen"),
                # Keyword argument: last_seen
                last_seen=actor_dict.get("last_seen"),
                motivation=actor_dict.get("motivation"),
                sophistication=actor_dict.get("sophistication"),
                is_active=True,
            )
            # Stage new record(s) for database insertion
            db.add(db_actor)
            # Flush changes to DB without committing the transaction
            db.flush()  # get the ID
            # Check: mitre_id
            if mitre_id:
                # Assign existing_actors[mitre_id] = db_actor
                existing_actors[mitre_id] = db_actor
            # Assign stix_to_db_actor[stix_id] = db_actor
            stix_to_db_actor[stix_id] = db_actor
            # Assign actors_created = 1
            actors_created += 1

    # Flush changes to DB without committing the transaction
    db.flush()

    # Step 5: Upsert actor-technique relationships
    # Load existing relationships
    existing_rels: set[tuple] = set()
    # Iterate over db.query(ThreatActorTechnique).all()
    for row in db.query(ThreatActorTechnique).all():
        # Call existing_rels.add()
        existing_rels.add((str(row.threat_actor_id), str(row.technique_id)))

    # Assign rels_created = 0
    rels_created = 0
    # Assign rels_skipped = 0
    rels_skipped = 0

    # Iterate over relationships
    for rel in relationships:
        # Assign source_ref = rel["source_ref"]
        source_ref = rel["source_ref"]
        # Assign target_ref = rel["target_ref"]
        target_ref = rel["target_ref"]

        # Resolve actor
        db_actor = stix_to_db_actor.get(source_ref)
        # Check: not db_actor
        if not db_actor:
            # Skip to the next loop iteration
            continue

        # Resolve technique
        mitre_technique_id = attack_pattern_map.get(target_ref)
        # Check: not mitre_technique_id
        if not mitre_technique_id:
            # Skip to the next loop iteration
            continue

        # Assign db_technique = technique_by_mitre_id.get(mitre_technique_id)
        db_technique = technique_by_mitre_id.get(mitre_technique_id)
        # Check: not db_technique
        if not db_technique:
            # Skip to the next loop iteration
            continue

        # Assign rel_key = (str(db_actor.id), str(db_technique.id))
        rel_key = (str(db_actor.id), str(db_technique.id))
        # Check: rel_key in existing_rels
        if rel_key in existing_rels:
            # Assign rels_skipped = 1
            rels_skipped += 1
            # Skip to the next loop iteration
            continue

        # Assign actor_technique = ThreatActorTechnique(
        actor_technique = ThreatActorTechnique(
            # Keyword argument: threat_actor_id
            threat_actor_id=db_actor.id,
            # Keyword argument: technique_id
            technique_id=db_technique.id,
            # Keyword argument: usage_description
            usage_description=rel["description"][:5000] if rel["description"] else None,
        )
        # Stage new record(s) for database insertion
        db.add(actor_technique)
        # Call existing_rels.add()
        existing_rels.add(rel_key)
        # Assign rels_created = 1
        rels_created += 1

    # Commit all pending changes to the database
    db.commit()

    # Assign summary = {
    summary = {
        # Literal argument value
        "actors_created": actors_created,
        # Literal argument value
        "actors_updated": actors_skipped,
        # Literal argument value
        "relationships_created": rels_created,
        # Literal argument value
        "relationships_skipped": rels_skipped,
        # Literal argument value
        "total_actors_parsed": len(actor_dicts),
        # Literal argument value
        "total_relationships_parsed": len(relationships),
    }

    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "mitre_cti").first()
    # Check: ds
    if ds:
        # Assign ds.last_sync_at = datetime.utcnow()
        ds.last_sync_at = datetime.utcnow()
        # Assign ds.last_sync_status = "success"
        ds.last_sync_status = "success"
        # Assign ds.last_sync_stats = summary
        ds.last_sync_stats = summary
        # Commit all pending changes to the database
        db.commit()

    # Log info: "MITRE CTI threat actor import complete — %s", sum
    logger.info("MITRE CTI threat actor import complete — %s", summary)

    # Call log_action()
    log_action(
        db,
        # Keyword argument: user_id
        user_id=None,
        # Keyword argument: action
        action="import_threat_actors",
        # Keyword argument: entity_type
        entity_type="threat_actor",
        # Keyword argument: entity_id
        entity_id=None,
        # Keyword argument: details
        details=summary,
    )
    # Commit all pending changes to the database
    db.commit()

    # Return summary
    return summary