feat(phase-23): add Threat Actor profiles with MITRE CTI import, API, heatmap and gap analysis (T-208 to T-212)

This commit is contained in:
2026-02-09 16:27:38 +01:00
parent f4c8cbf768
commit 2fc0e2cafd
12 changed files with 1798 additions and 2 deletions

View File

@@ -0,0 +1,72 @@
"""add_threat_actors_tables
Revision ID: b010threatactors
Revises: b009detectionrules
Create Date: 2026-02-09 15:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID, JSONB
# revision identifiers, used by Alembic.
revision: str = 'b010threatactors'
down_revision: Union[str, Sequence[str], None] = 'b009detectionrules'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create threat_actors and threat_actor_techniques tables."""
# threat_actors
op.create_table(
'threat_actors',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('mitre_id', sa.String(), unique=True, nullable=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('aliases', JSONB(), nullable=True),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('country', sa.String(), nullable=True),
sa.Column('target_sectors', JSONB(), nullable=True),
sa.Column('target_regions', JSONB(), nullable=True),
sa.Column('motivation', sa.String(), nullable=True),
sa.Column('sophistication', sa.String(), nullable=True),
sa.Column('first_seen', sa.String(), nullable=True),
sa.Column('last_seen', sa.String(), nullable=True),
sa.Column('references', JSONB(), nullable=True),
sa.Column('mitre_url', sa.String(), nullable=True),
sa.Column('is_active', sa.Boolean(), server_default='true'),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_threat_actors_country', 'threat_actors', ['country'])
op.create_index('ix_threat_actors_motivation', 'threat_actors', ['motivation'])
# threat_actor_techniques (junction table)
op.create_table(
'threat_actor_techniques',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('threat_actor_id', UUID(as_uuid=True),
sa.ForeignKey('threat_actors.id', ondelete='CASCADE'), nullable=False),
sa.Column('technique_id', UUID(as_uuid=True),
sa.ForeignKey('techniques.id', ondelete='CASCADE'), nullable=False),
sa.Column('usage_description', sa.Text(), nullable=True),
sa.Column('first_seen_using', sa.String(), nullable=True),
)
op.create_index('ix_threat_actor_techniques_actor', 'threat_actor_techniques', ['threat_actor_id'])
op.create_index('ix_threat_actor_techniques_technique', 'threat_actor_techniques', ['technique_id'])
op.create_unique_constraint('uq_actor_technique', 'threat_actor_techniques',
['threat_actor_id', 'technique_id'])
def downgrade() -> None:
"""Drop threat_actor_techniques and threat_actors tables."""
op.drop_constraint('uq_actor_technique', 'threat_actor_techniques', type_='unique')
op.drop_index('ix_threat_actor_techniques_technique', table_name='threat_actor_techniques')
op.drop_index('ix_threat_actor_techniques_actor', table_name='threat_actor_techniques')
op.drop_table('threat_actor_techniques')
op.drop_index('ix_threat_actors_motivation', table_name='threat_actors')
op.drop_index('ix_threat_actors_country', table_name='threat_actors')
op.drop_table('threat_actors')

View File

@@ -19,6 +19,7 @@ from app.routers import audit as audit_router
from app.routers import notifications as notifications_router
from app.routers import reports as reports_router
from app.routers import data_sources as data_sources_router
from app.routers import threat_actors as threat_actors_router
from app.storage import ensure_bucket_exists
from app.jobs.mitre_sync_job import start_scheduler, scheduler
@@ -62,6 +63,7 @@ app.include_router(audit_router.router, prefix="/api/v1")
app.include_router(notifications_router.router, prefix="/api/v1")
app.include_router(reports_router.router, prefix="/api/v1")
app.include_router(data_sources_router.router, prefix="/api/v1")
app.include_router(threat_actors_router.router, prefix="/api/v1")
@app.get("/health")

View File

@@ -9,11 +9,12 @@ from app.models.audit import AuditLog
from app.models.notification import Notification
from app.models.data_source import DataSource
from app.models.detection_rule import DetectionRule
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
from app.models.enums import TechniqueStatus, TestState, TestResult, TeamSide
__all__ = [
"User", "Technique", "Test", "TestTemplate", "Evidence",
"IntelItem", "AuditLog", "Notification", "DataSource",
"DetectionRule",
"DetectionRule", "ThreatActor", "ThreatActorTechnique",
"TechniqueStatus", "TestState", "TestResult", "TeamSide",
]

View File

@@ -0,0 +1,92 @@
"""ThreatActor and ThreatActorTechnique models.
Stores profiles of APT groups and their associated MITRE ATT&CK
techniques, imported from MITRE CTI (STIX 2.0).
"""
import uuid
from datetime import datetime
from sqlalchemy import (
Column, String, Text, Boolean, DateTime,
ForeignKey, Index, UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.orm import relationship
from app.database import Base
class ThreatActor(Base):
"""
Threat actor / APT group profile.
Imported from MITRE CTI ``intrusion-set`` STIX objects.
"""
__tablename__ = "threat_actors"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
mitre_id = Column(String, unique=True, nullable=True) # e.g. "G0016" (APT29)
name = Column(String, nullable=False)
aliases = Column(JSONB, nullable=True, default=[]) # ["Cozy Bear", "The Dukes", ...]
description = Column(Text, nullable=True)
country = Column(String, nullable=True)
target_sectors = Column(JSONB, nullable=True, default=[]) # ["government", "defense", ...]
target_regions = Column(JSONB, nullable=True, default=[]) # ["north-america", "europe", ...]
motivation = Column(String, nullable=True) # espionage / financial / destruction / ...
sophistication = Column(String, nullable=True) # low / medium / high / advanced
first_seen = Column(String, nullable=True)
last_seen = Column(String, nullable=True)
references = Column(JSONB, nullable=True, default=[]) # [{"url": "...", "description": "..."}]
mitre_url = Column(String, nullable=True)
is_active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.utcnow)
# Relationships
techniques = relationship(
"ThreatActorTechnique",
back_populates="threat_actor",
cascade="all, delete-orphan",
)
__table_args__ = (
Index('ix_threat_actors_country', 'country'),
Index('ix_threat_actors_motivation', 'motivation'),
)
class ThreatActorTechnique(Base):
"""
Association between a threat actor and a MITRE ATT&CK technique.
Stores additional context about how the actor uses the technique
(from the STIX ``relationship`` ``uses`` objects).
"""
__tablename__ = "threat_actor_techniques"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
threat_actor_id = Column(
UUID(as_uuid=True),
ForeignKey("threat_actors.id", ondelete="CASCADE"),
nullable=False,
)
technique_id = Column(
UUID(as_uuid=True),
ForeignKey("techniques.id", ondelete="CASCADE"),
nullable=False,
)
usage_description = Column(Text, nullable=True)
first_seen_using = Column(String, nullable=True)
# Relationships
threat_actor = relationship("ThreatActor", back_populates="techniques")
technique = relationship("Technique")
__table_args__ = (
Index('ix_threat_actor_techniques_actor', 'threat_actor_id'),
Index('ix_threat_actor_techniques_technique', 'technique_id'),
UniqueConstraint(
'threat_actor_id', 'technique_id',
name='uq_actor_technique',
),
)

View File

@@ -39,7 +39,8 @@ def _get_sync_handler(source_name: str):
"gtfobins": ("app.services.lolbas_import_service", "sync_gtfobins"),
"caldera": ("app.services.caldera_import_service", "sync"),
"elastic_rules": ("app.services.elastic_import_service", "sync"),
# d3fend and mitre_cti added in later phases
"mitre_cti": ("app.services.threat_actor_import_service", "sync"),
# d3fend added in later phases
}
if source_name not in handlers:

View File

@@ -0,0 +1,309 @@
"""Threat actor endpoints.
Provides listing, detail, coverage analysis, and gap analysis for
threat actor profiles imported from MITRE CTI.
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import func, or_
from sqlalchemy.orm import Session, joinedload
from app.database import get_db
from app.dependencies.auth import get_current_user
from app.models.user import User
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
from app.models.technique import Technique
from app.models.test import Test
from app.models.test_template import TestTemplate
from app.models.enums import TechniqueStatus
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/threat-actors", tags=["threat-actors"])
# ---------------------------------------------------------------------------
# GET /threat-actors — Listado con filtros
# ---------------------------------------------------------------------------
@router.get("")
def list_threat_actors(
search: Optional[str] = Query(None),
country: Optional[str] = Query(None),
motivation: Optional[str] = Query(None),
sophistication: Optional[str] = Query(None),
target_sectors: Optional[str] = Query(None),
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""List threat actors with optional filters and pagination.
**Requires** authentication (any role).
"""
query = db.query(ThreatActor)
# Filters
if search:
pattern = f"%{search}%"
query = query.filter(
or_(
ThreatActor.name.ilike(pattern),
ThreatActor.description.ilike(pattern),
func.cast(ThreatActor.aliases, func.text()).ilike(pattern),
)
)
if country:
query = query.filter(ThreatActor.country == country)
if motivation:
query = query.filter(ThreatActor.motivation == motivation)
if sophistication:
query = query.filter(ThreatActor.sophistication == sophistication)
if target_sectors:
# JSONB contains check
query = query.filter(
func.cast(ThreatActor.target_sectors, func.text()).ilike(f"%{target_sectors}%")
)
# Total count
total = query.count()
# Paginate
actors = query.order_by(ThreatActor.name).offset(offset).limit(limit).all()
# For each actor, count techniques and calculate basic coverage
results = []
for actor in actors:
tech_count = (
db.query(ThreatActorTechnique)
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
.count()
)
# Quick coverage calculation
covered = (
db.query(ThreatActorTechnique)
.join(Technique, ThreatActorTechnique.technique_id == Technique.id)
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
.filter(Technique.status_global.in_([
TechniqueStatus.validated,
TechniqueStatus.partial,
]))
.count()
)
coverage_pct = round((covered / tech_count * 100), 1) if tech_count > 0 else 0.0
results.append({
"id": str(actor.id),
"mitre_id": actor.mitre_id,
"name": actor.name,
"aliases": actor.aliases or [],
"country": actor.country,
"target_sectors": actor.target_sectors or [],
"target_regions": actor.target_regions or [],
"motivation": actor.motivation,
"sophistication": actor.sophistication,
"mitre_url": actor.mitre_url,
"technique_count": tech_count,
"coverage_pct": coverage_pct,
"is_active": actor.is_active,
})
return {
"total": total,
"offset": offset,
"limit": limit,
"items": results,
}
# ---------------------------------------------------------------------------
# GET /threat-actors/{id} — Detalle
# ---------------------------------------------------------------------------
@router.get("/{actor_id}")
def get_threat_actor(
actor_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Get detailed info about a threat actor including techniques.
**Requires** authentication (any role).
"""
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
if not actor:
raise HTTPException(status_code=404, detail="Threat actor not found")
# Get associated techniques with their coverage status
actor_techniques = (
db.query(ThreatActorTechnique, Technique)
.join(Technique, ThreatActorTechnique.technique_id == Technique.id)
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
.order_by(Technique.mitre_id)
.all()
)
techniques_list = []
for at, tech in actor_techniques:
techniques_list.append({
"technique_id": str(tech.id),
"mitre_id": tech.mitre_id,
"name": tech.name,
"tactic": tech.tactic,
"status_global": tech.status_global.value if tech.status_global else None,
"usage_description": at.usage_description,
"first_seen_using": at.first_seen_using,
})
return {
"id": str(actor.id),
"mitre_id": actor.mitre_id,
"name": actor.name,
"aliases": actor.aliases or [],
"description": actor.description,
"country": actor.country,
"target_sectors": actor.target_sectors or [],
"target_regions": actor.target_regions or [],
"motivation": actor.motivation,
"sophistication": actor.sophistication,
"first_seen": actor.first_seen,
"last_seen": actor.last_seen,
"references": actor.references or [],
"mitre_url": actor.mitre_url,
"is_active": actor.is_active,
"techniques": techniques_list,
}
# ---------------------------------------------------------------------------
# GET /threat-actors/{id}/coverage — Cobertura
# ---------------------------------------------------------------------------
@router.get("/{actor_id}/coverage")
def get_threat_actor_coverage(
actor_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Calculate coverage percentage against a specific threat actor.
**Requires** authentication (any role).
Returns the percentage of the actor's techniques that have been
validated or partially validated, along with a breakdown.
"""
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
if not actor:
raise HTTPException(status_code=404, detail="Threat actor not found")
# Get all techniques for this actor
actor_techniques = (
db.query(Technique)
.join(ThreatActorTechnique, ThreatActorTechnique.technique_id == Technique.id)
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
.all()
)
total = len(actor_techniques)
if total == 0:
return {
"actor_id": str(actor.id),
"actor_name": actor.name,
"total_techniques": 0,
"coverage_pct": 0.0,
"breakdown": {},
}
breakdown = {}
for tech in actor_techniques:
status = tech.status_global.value if tech.status_global else "not_evaluated"
breakdown[status] = breakdown.get(status, 0) + 1
covered = breakdown.get("validated", 0) + breakdown.get("partial", 0)
coverage_pct = round((covered / total * 100), 1)
return {
"actor_id": str(actor.id),
"actor_name": actor.name,
"total_techniques": total,
"covered": covered,
"coverage_pct": coverage_pct,
"breakdown": breakdown,
}
# ---------------------------------------------------------------------------
# GET /threat-actors/{id}/gaps — Gap analysis
# ---------------------------------------------------------------------------
@router.get("/{actor_id}/gaps")
def get_threat_actor_gaps(
actor_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Identify techniques of this actor that are NOT fully validated.
**Requires** authentication (any role).
Returns list of gap techniques with available templates.
"""
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
if not actor:
raise HTTPException(status_code=404, detail="Threat actor not found")
# Get techniques NOT validated
gap_techniques = (
db.query(Technique, ThreatActorTechnique)
.join(ThreatActorTechnique, ThreatActorTechnique.technique_id == Technique.id)
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
.filter(Technique.status_global != TechniqueStatus.validated)
.order_by(Technique.mitre_id)
.all()
)
gaps = []
for tech, at in gap_techniques:
# Count available templates for this technique
template_count = (
db.query(TestTemplate)
.filter(TestTemplate.mitre_technique_id == tech.mitre_id)
.filter(TestTemplate.is_active == True)
.count()
)
# Count existing tests
test_count = (
db.query(Test)
.filter(Test.technique_id == tech.id)
.count()
)
gaps.append({
"technique_id": str(tech.id),
"mitre_id": tech.mitre_id,
"name": tech.name,
"tactic": tech.tactic,
"status_global": tech.status_global.value if tech.status_global else None,
"usage_description": at.usage_description,
"available_templates": template_count,
"existing_tests": test_count,
"has_templates": template_count > 0,
})
return {
"actor_id": str(actor.id),
"actor_name": actor.name,
"total_gaps": len(gaps),
"gaps": gaps,
}

View File

@@ -0,0 +1,373 @@
"""Threat Actor import service (MITRE CTI / STIX 2.0).
Downloads the MITRE CTI repository, parses the STIX 2.0 bundle for
``intrusion-set`` objects (APT groups) and ``relationship`` objects
linking them to ``attack-pattern`` (techniques), then creates
:class:`ThreatActor` and :class:`ThreatActorTechnique` records.
STIX 2.0 structure
------------------
The enterprise-attack bundle contains:
- ``intrusion-set`` objects → our ThreatActor rows
- ``attack-pattern`` objects → already in our Technique table
- ``relationship`` objects (type=uses) → connects intrusion-set → attack-pattern
Strategy
--------
1. Download ZIP of ``github.com/mitre/cti``.
2. Load ``enterprise-attack/enterprise-attack.json`` (single STIX bundle).
3. Build lookup maps for intrusion-sets and attack-patterns.
4. Parse relationships to connect actors → techniques.
5. Upsert into database.
Idempotency
-----------
Deduplication by ``mitre_id`` for ThreatActor and by the unique
constraint ``(threat_actor_id, technique_id)`` for ThreatActorTechnique.
"""
import io
import json
import logging
import shutil
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
import requests as _requests
from sqlalchemy.orm import Session
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
from app.models.technique import Technique
from app.models.data_source import DataSource
from app.services.audit_service import log_action
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
MITRE_CTI_ZIP_URL = (
"https://github.com/mitre/cti"
"/archive/refs/heads/master.zip"
)
_DOWNLOAD_TIMEOUT = 300
_ZIP_ROOT_PREFIX = "cti-master"
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _download_zip(url: str = MITRE_CTI_ZIP_URL) -> bytes:
"""Download the MITRE CTI ZIP and return raw bytes."""
logger.info("Downloading MITRE CTI ZIP from %s", url)
resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
resp.raise_for_status()
content = resp.content
logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
return content
def _extract_zip_and_load_bundle(zip_bytes: bytes, dest: str) -> dict:
"""Extract ZIP and load the enterprise-attack STIX bundle."""
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
zf.extractall(dest)
bundle_path = (
Path(dest) / _ZIP_ROOT_PREFIX
/ "enterprise-attack" / "enterprise-attack.json"
)
if not bundle_path.is_file():
raise FileNotFoundError(
f"STIX bundle not found at {bundle_path}"
)
logger.info("Loading STIX bundle from %s", bundle_path)
with open(bundle_path, "r", encoding="utf-8") as fh:
bundle = json.load(fh)
objects = bundle.get("objects", [])
logger.info("Loaded %d STIX objects", len(objects))
return bundle
def _extract_mitre_id(external_references: list) -> str | None:
"""Extract the MITRE ATT&CK ID from external_references."""
if not isinstance(external_references, list):
return None
for ref in external_references:
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
return ref.get("external_id")
return None
def _extract_mitre_url(external_references: list) -> str | None:
"""Extract the MITRE ATT&CK URL from external_references."""
if not isinstance(external_references, list):
return None
for ref in external_references:
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
return ref.get("url")
return None
def _parse_intrusion_sets(objects: list) -> list[dict]:
"""Parse STIX intrusion-set objects into ThreatActor dicts."""
actors = []
for obj in objects:
if obj.get("type") != "intrusion-set":
continue
if obj.get("revoked"):
continue
ext_refs = obj.get("external_references", [])
mitre_id = _extract_mitre_id(ext_refs)
mitre_url = _extract_mitre_url(ext_refs)
name = obj.get("name", "").strip()
if not name:
continue
aliases = obj.get("aliases", [])
if isinstance(aliases, list) and name in aliases:
aliases = [a for a in aliases if a != name]
description = obj.get("description", "")
# Extract references (non-MITRE)
references = []
for ref in ext_refs:
if isinstance(ref, dict) and ref.get("source_name") != "mitre-attack":
references.append({
"source": ref.get("source_name", ""),
"url": ref.get("url", ""),
"description": ref.get("description", ""),
})
actors.append({
"stix_id": obj.get("id"), # e.g. "intrusion-set--abc123"
"mitre_id": mitre_id,
"name": name,
"aliases": aliases if aliases else [],
"description": description,
"mitre_url": mitre_url,
"references": references[:20], # cap to avoid bloat
"first_seen": obj.get("first_seen"),
"last_seen": obj.get("last_seen"),
})
logger.info("Parsed %d intrusion-sets (threat actors)", len(actors))
return actors
def _parse_relationships(objects: list) -> list[dict]:
"""Parse STIX relationship objects (type=uses) linking
intrusion-sets to attack-patterns.
"""
relationships = []
for obj in objects:
if obj.get("type") != "relationship":
continue
if obj.get("relationship_type") != "uses":
continue
if obj.get("revoked"):
continue
source_ref = obj.get("source_ref", "")
target_ref = obj.get("target_ref", "")
# We want intrusion-set → attack-pattern
if not source_ref.startswith("intrusion-set--"):
continue
if not target_ref.startswith("attack-pattern--"):
continue
relationships.append({
"source_ref": source_ref,
"target_ref": target_ref,
"description": obj.get("description", ""),
})
logger.info("Parsed %d uses-relationships (actor→technique)", len(relationships))
return relationships
def _build_attack_pattern_map(objects: list) -> dict[str, str]:
"""Build a map from STIX attack-pattern ID → MITRE technique ID.
e.g. {"attack-pattern--abc123": "T1059.001"}
"""
mapping = {}
for obj in objects:
if obj.get("type") != "attack-pattern":
continue
if obj.get("revoked"):
continue
stix_id = obj.get("id", "")
mitre_id = _extract_mitre_id(obj.get("external_references", []))
if stix_id and mitre_id:
mapping[stix_id] = mitre_id
logger.info("Built attack-pattern map with %d entries", len(mapping))
return mapping
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def sync(db: Session) -> dict:
"""Download and import threat actors from MITRE CTI.
Returns a summary dict.
"""
tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
try:
zip_bytes = _download_zip()
bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)
logger.info("Cleaned up temp directory %s", tmp_dir)
objects = bundle.get("objects", [])
# Step 1: Parse data
actor_dicts = _parse_intrusion_sets(objects)
relationships = _parse_relationships(objects)
attack_pattern_map = _build_attack_pattern_map(objects)
# Step 2: Build STIX-ID → actor dict map
stix_to_actor = {a["stix_id"]: a for a in actor_dicts}
# Step 3: Load existing actors and techniques from DB
existing_actors = {
row.mitre_id: row
for row in db.query(ThreatActor).all()
if row.mitre_id
}
technique_by_mitre_id = {
row.mitre_id: row
for row in db.query(Technique).all()
}
# Step 4: Upsert threat actors
actors_created = 0
actors_skipped = 0
stix_to_db_actor: dict[str, ThreatActor] = {}
for actor_dict in actor_dicts:
mitre_id = actor_dict["mitre_id"]
stix_id = actor_dict["stix_id"]
if mitre_id and mitre_id in existing_actors:
# Update existing actor
db_actor = existing_actors[mitre_id]
db_actor.name = actor_dict["name"]
db_actor.aliases = actor_dict["aliases"]
db_actor.description = actor_dict["description"]
db_actor.mitre_url = actor_dict["mitre_url"]
db_actor.references = actor_dict["references"]
db_actor.first_seen = actor_dict.get("first_seen")
db_actor.last_seen = actor_dict.get("last_seen")
stix_to_db_actor[stix_id] = db_actor
actors_skipped += 1
else:
# Create new actor
db_actor = ThreatActor(
mitre_id=mitre_id,
name=actor_dict["name"],
aliases=actor_dict["aliases"],
description=actor_dict["description"],
mitre_url=actor_dict["mitre_url"],
references=actor_dict["references"],
first_seen=actor_dict.get("first_seen"),
last_seen=actor_dict.get("last_seen"),
is_active=True,
)
db.add(db_actor)
db.flush() # get the ID
if mitre_id:
existing_actors[mitre_id] = db_actor
stix_to_db_actor[stix_id] = db_actor
actors_created += 1
db.flush()
# Step 5: Upsert actor-technique relationships
# Load existing relationships
existing_rels: set[tuple] = set()
for row in db.query(ThreatActorTechnique).all():
existing_rels.add((str(row.threat_actor_id), str(row.technique_id)))
rels_created = 0
rels_skipped = 0
for rel in relationships:
source_ref = rel["source_ref"]
target_ref = rel["target_ref"]
# Resolve actor
db_actor = stix_to_db_actor.get(source_ref)
if not db_actor:
continue
# Resolve technique
mitre_technique_id = attack_pattern_map.get(target_ref)
if not mitre_technique_id:
continue
db_technique = technique_by_mitre_id.get(mitre_technique_id)
if not db_technique:
continue
rel_key = (str(db_actor.id), str(db_technique.id))
if rel_key in existing_rels:
rels_skipped += 1
continue
actor_technique = ThreatActorTechnique(
threat_actor_id=db_actor.id,
technique_id=db_technique.id,
usage_description=rel["description"][:5000] if rel["description"] else None,
)
db.add(actor_technique)
existing_rels.add(rel_key)
rels_created += 1
db.commit()
summary = {
"actors_created": actors_created,
"actors_updated": actors_skipped,
"relationships_created": rels_created,
"relationships_skipped": rels_skipped,
"total_actors_parsed": len(actor_dicts),
"total_relationships_parsed": len(relationships),
}
# Update DataSource record
ds = db.query(DataSource).filter(DataSource.name == "mitre_cti").first()
if ds:
ds.last_sync_at = datetime.utcnow()
ds.last_sync_status = "success"
ds.last_sync_stats = summary
db.commit()
logger.info("MITRE CTI threat actor import complete — %s", summary)
log_action(
db,
user_id=None,
action="import_threat_actors",
entity_type="threat_actor",
entity_id=None,
details=summary,
)
return summary