feat(phase-23): add Threat Actor profiles with MITRE CTI import, API, heatmap and gap analysis (T-208 to T-212)
This commit is contained in:
72
backend/alembic/versions/b010_add_threat_actors_tables.py
Normal file
72
backend/alembic/versions/b010_add_threat_actors_tables.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""add_threat_actors_tables
|
||||
|
||||
Revision ID: b010threatactors
|
||||
Revises: b009detectionrules
|
||||
Create Date: 2026-02-09 15:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'b010threatactors'
|
||||
down_revision: Union[str, Sequence[str], None] = 'b009detectionrules'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create threat_actors and threat_actor_techniques tables."""
|
||||
# threat_actors
|
||||
op.create_table(
|
||||
'threat_actors',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('mitre_id', sa.String(), unique=True, nullable=True),
|
||||
sa.Column('name', sa.String(), nullable=False),
|
||||
sa.Column('aliases', JSONB(), nullable=True),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('country', sa.String(), nullable=True),
|
||||
sa.Column('target_sectors', JSONB(), nullable=True),
|
||||
sa.Column('target_regions', JSONB(), nullable=True),
|
||||
sa.Column('motivation', sa.String(), nullable=True),
|
||||
sa.Column('sophistication', sa.String(), nullable=True),
|
||||
sa.Column('first_seen', sa.String(), nullable=True),
|
||||
sa.Column('last_seen', sa.String(), nullable=True),
|
||||
sa.Column('references', JSONB(), nullable=True),
|
||||
sa.Column('mitre_url', sa.String(), nullable=True),
|
||||
sa.Column('is_active', sa.Boolean(), server_default='true'),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index('ix_threat_actors_country', 'threat_actors', ['country'])
|
||||
op.create_index('ix_threat_actors_motivation', 'threat_actors', ['motivation'])
|
||||
|
||||
# threat_actor_techniques (junction table)
|
||||
op.create_table(
|
||||
'threat_actor_techniques',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('threat_actor_id', UUID(as_uuid=True),
|
||||
sa.ForeignKey('threat_actors.id', ondelete='CASCADE'), nullable=False),
|
||||
sa.Column('technique_id', UUID(as_uuid=True),
|
||||
sa.ForeignKey('techniques.id', ondelete='CASCADE'), nullable=False),
|
||||
sa.Column('usage_description', sa.Text(), nullable=True),
|
||||
sa.Column('first_seen_using', sa.String(), nullable=True),
|
||||
)
|
||||
op.create_index('ix_threat_actor_techniques_actor', 'threat_actor_techniques', ['threat_actor_id'])
|
||||
op.create_index('ix_threat_actor_techniques_technique', 'threat_actor_techniques', ['technique_id'])
|
||||
op.create_unique_constraint('uq_actor_technique', 'threat_actor_techniques',
|
||||
['threat_actor_id', 'technique_id'])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop threat_actor_techniques and threat_actors tables."""
|
||||
op.drop_constraint('uq_actor_technique', 'threat_actor_techniques', type_='unique')
|
||||
op.drop_index('ix_threat_actor_techniques_technique', table_name='threat_actor_techniques')
|
||||
op.drop_index('ix_threat_actor_techniques_actor', table_name='threat_actor_techniques')
|
||||
op.drop_table('threat_actor_techniques')
|
||||
op.drop_index('ix_threat_actors_motivation', table_name='threat_actors')
|
||||
op.drop_index('ix_threat_actors_country', table_name='threat_actors')
|
||||
op.drop_table('threat_actors')
|
||||
@@ -19,6 +19,7 @@ from app.routers import audit as audit_router
|
||||
from app.routers import notifications as notifications_router
|
||||
from app.routers import reports as reports_router
|
||||
from app.routers import data_sources as data_sources_router
|
||||
from app.routers import threat_actors as threat_actors_router
|
||||
from app.storage import ensure_bucket_exists
|
||||
from app.jobs.mitre_sync_job import start_scheduler, scheduler
|
||||
|
||||
@@ -62,6 +63,7 @@ app.include_router(audit_router.router, prefix="/api/v1")
|
||||
app.include_router(notifications_router.router, prefix="/api/v1")
|
||||
app.include_router(reports_router.router, prefix="/api/v1")
|
||||
app.include_router(data_sources_router.router, prefix="/api/v1")
|
||||
app.include_router(threat_actors_router.router, prefix="/api/v1")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -9,11 +9,12 @@ from app.models.audit import AuditLog
|
||||
from app.models.notification import Notification
|
||||
from app.models.data_source import DataSource
|
||||
from app.models.detection_rule import DetectionRule
|
||||
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
|
||||
from app.models.enums import TechniqueStatus, TestState, TestResult, TeamSide
|
||||
|
||||
__all__ = [
|
||||
"User", "Technique", "Test", "TestTemplate", "Evidence",
|
||||
"IntelItem", "AuditLog", "Notification", "DataSource",
|
||||
"DetectionRule",
|
||||
"DetectionRule", "ThreatActor", "ThreatActorTechnique",
|
||||
"TechniqueStatus", "TestState", "TestResult", "TeamSide",
|
||||
]
|
||||
|
||||
92
backend/app/models/threat_actor.py
Normal file
92
backend/app/models/threat_actor.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""ThreatActor and ThreatActorTechnique models.
|
||||
|
||||
Stores profiles of APT groups and their associated MITRE ATT&CK
|
||||
techniques, imported from MITRE CTI (STIX 2.0).
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Column, String, Text, Boolean, DateTime,
|
||||
ForeignKey, Index, UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class ThreatActor(Base):
|
||||
"""
|
||||
Threat actor / APT group profile.
|
||||
|
||||
Imported from MITRE CTI ``intrusion-set`` STIX objects.
|
||||
"""
|
||||
__tablename__ = "threat_actors"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
mitre_id = Column(String, unique=True, nullable=True) # e.g. "G0016" (APT29)
|
||||
name = Column(String, nullable=False)
|
||||
aliases = Column(JSONB, nullable=True, default=[]) # ["Cozy Bear", "The Dukes", ...]
|
||||
description = Column(Text, nullable=True)
|
||||
country = Column(String, nullable=True)
|
||||
target_sectors = Column(JSONB, nullable=True, default=[]) # ["government", "defense", ...]
|
||||
target_regions = Column(JSONB, nullable=True, default=[]) # ["north-america", "europe", ...]
|
||||
motivation = Column(String, nullable=True) # espionage / financial / destruction / ...
|
||||
sophistication = Column(String, nullable=True) # low / medium / high / advanced
|
||||
first_seen = Column(String, nullable=True)
|
||||
last_seen = Column(String, nullable=True)
|
||||
references = Column(JSONB, nullable=True, default=[]) # [{"url": "...", "description": "..."}]
|
||||
mitre_url = Column(String, nullable=True)
|
||||
is_active = Column(Boolean, default=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
# Relationships
|
||||
techniques = relationship(
|
||||
"ThreatActorTechnique",
|
||||
back_populates="threat_actor",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_threat_actors_country', 'country'),
|
||||
Index('ix_threat_actors_motivation', 'motivation'),
|
||||
)
|
||||
|
||||
|
||||
class ThreatActorTechnique(Base):
|
||||
"""
|
||||
Association between a threat actor and a MITRE ATT&CK technique.
|
||||
|
||||
Stores additional context about how the actor uses the technique
|
||||
(from the STIX ``relationship`` ``uses`` objects).
|
||||
"""
|
||||
__tablename__ = "threat_actor_techniques"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
threat_actor_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("threat_actors.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
technique_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("techniques.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
usage_description = Column(Text, nullable=True)
|
||||
first_seen_using = Column(String, nullable=True)
|
||||
|
||||
# Relationships
|
||||
threat_actor = relationship("ThreatActor", back_populates="techniques")
|
||||
technique = relationship("Technique")
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_threat_actor_techniques_actor', 'threat_actor_id'),
|
||||
Index('ix_threat_actor_techniques_technique', 'technique_id'),
|
||||
UniqueConstraint(
|
||||
'threat_actor_id', 'technique_id',
|
||||
name='uq_actor_technique',
|
||||
),
|
||||
)
|
||||
@@ -39,7 +39,8 @@ def _get_sync_handler(source_name: str):
|
||||
"gtfobins": ("app.services.lolbas_import_service", "sync_gtfobins"),
|
||||
"caldera": ("app.services.caldera_import_service", "sync"),
|
||||
"elastic_rules": ("app.services.elastic_import_service", "sync"),
|
||||
# d3fend and mitre_cti added in later phases
|
||||
"mitre_cti": ("app.services.threat_actor_import_service", "sync"),
|
||||
# d3fend added in later phases
|
||||
}
|
||||
|
||||
if source_name not in handlers:
|
||||
|
||||
309
backend/app/routers/threat_actors.py
Normal file
309
backend/app/routers/threat_actors.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Threat actor endpoints.
|
||||
|
||||
Provides listing, detail, coverage analysis, and gap analysis for
|
||||
threat actor profiles imported from MITRE CTI.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import func, or_
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.database import get_db
|
||||
from app.dependencies.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
|
||||
from app.models.technique import Technique
|
||||
from app.models.test import Test
|
||||
from app.models.test_template import TestTemplate
|
||||
from app.models.enums import TechniqueStatus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/threat-actors", tags=["threat-actors"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /threat-actors — Listado con filtros
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("")
|
||||
def list_threat_actors(
|
||||
search: Optional[str] = Query(None),
|
||||
country: Optional[str] = Query(None),
|
||||
motivation: Optional[str] = Query(None),
|
||||
sophistication: Optional[str] = Query(None),
|
||||
target_sectors: Optional[str] = Query(None),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
"""List threat actors with optional filters and pagination.
|
||||
|
||||
**Requires** authentication (any role).
|
||||
"""
|
||||
query = db.query(ThreatActor)
|
||||
|
||||
# Filters
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
query = query.filter(
|
||||
or_(
|
||||
ThreatActor.name.ilike(pattern),
|
||||
ThreatActor.description.ilike(pattern),
|
||||
func.cast(ThreatActor.aliases, func.text()).ilike(pattern),
|
||||
)
|
||||
)
|
||||
|
||||
if country:
|
||||
query = query.filter(ThreatActor.country == country)
|
||||
|
||||
if motivation:
|
||||
query = query.filter(ThreatActor.motivation == motivation)
|
||||
|
||||
if sophistication:
|
||||
query = query.filter(ThreatActor.sophistication == sophistication)
|
||||
|
||||
if target_sectors:
|
||||
# JSONB contains check
|
||||
query = query.filter(
|
||||
func.cast(ThreatActor.target_sectors, func.text()).ilike(f"%{target_sectors}%")
|
||||
)
|
||||
|
||||
# Total count
|
||||
total = query.count()
|
||||
|
||||
# Paginate
|
||||
actors = query.order_by(ThreatActor.name).offset(offset).limit(limit).all()
|
||||
|
||||
# For each actor, count techniques and calculate basic coverage
|
||||
results = []
|
||||
for actor in actors:
|
||||
tech_count = (
|
||||
db.query(ThreatActorTechnique)
|
||||
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
|
||||
.count()
|
||||
)
|
||||
|
||||
# Quick coverage calculation
|
||||
covered = (
|
||||
db.query(ThreatActorTechnique)
|
||||
.join(Technique, ThreatActorTechnique.technique_id == Technique.id)
|
||||
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
|
||||
.filter(Technique.status_global.in_([
|
||||
TechniqueStatus.validated,
|
||||
TechniqueStatus.partial,
|
||||
]))
|
||||
.count()
|
||||
)
|
||||
|
||||
coverage_pct = round((covered / tech_count * 100), 1) if tech_count > 0 else 0.0
|
||||
|
||||
results.append({
|
||||
"id": str(actor.id),
|
||||
"mitre_id": actor.mitre_id,
|
||||
"name": actor.name,
|
||||
"aliases": actor.aliases or [],
|
||||
"country": actor.country,
|
||||
"target_sectors": actor.target_sectors or [],
|
||||
"target_regions": actor.target_regions or [],
|
||||
"motivation": actor.motivation,
|
||||
"sophistication": actor.sophistication,
|
||||
"mitre_url": actor.mitre_url,
|
||||
"technique_count": tech_count,
|
||||
"coverage_pct": coverage_pct,
|
||||
"is_active": actor.is_active,
|
||||
})
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"offset": offset,
|
||||
"limit": limit,
|
||||
"items": results,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /threat-actors/{id} — Detalle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/{actor_id}")
|
||||
def get_threat_actor(
|
||||
actor_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
"""Get detailed info about a threat actor including techniques.
|
||||
|
||||
**Requires** authentication (any role).
|
||||
"""
|
||||
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
|
||||
if not actor:
|
||||
raise HTTPException(status_code=404, detail="Threat actor not found")
|
||||
|
||||
# Get associated techniques with their coverage status
|
||||
actor_techniques = (
|
||||
db.query(ThreatActorTechnique, Technique)
|
||||
.join(Technique, ThreatActorTechnique.technique_id == Technique.id)
|
||||
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
|
||||
.order_by(Technique.mitre_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
techniques_list = []
|
||||
for at, tech in actor_techniques:
|
||||
techniques_list.append({
|
||||
"technique_id": str(tech.id),
|
||||
"mitre_id": tech.mitre_id,
|
||||
"name": tech.name,
|
||||
"tactic": tech.tactic,
|
||||
"status_global": tech.status_global.value if tech.status_global else None,
|
||||
"usage_description": at.usage_description,
|
||||
"first_seen_using": at.first_seen_using,
|
||||
})
|
||||
|
||||
return {
|
||||
"id": str(actor.id),
|
||||
"mitre_id": actor.mitre_id,
|
||||
"name": actor.name,
|
||||
"aliases": actor.aliases or [],
|
||||
"description": actor.description,
|
||||
"country": actor.country,
|
||||
"target_sectors": actor.target_sectors or [],
|
||||
"target_regions": actor.target_regions or [],
|
||||
"motivation": actor.motivation,
|
||||
"sophistication": actor.sophistication,
|
||||
"first_seen": actor.first_seen,
|
||||
"last_seen": actor.last_seen,
|
||||
"references": actor.references or [],
|
||||
"mitre_url": actor.mitre_url,
|
||||
"is_active": actor.is_active,
|
||||
"techniques": techniques_list,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /threat-actors/{id}/coverage — Cobertura
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/{actor_id}/coverage")
|
||||
def get_threat_actor_coverage(
|
||||
actor_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
"""Calculate coverage percentage against a specific threat actor.
|
||||
|
||||
**Requires** authentication (any role).
|
||||
|
||||
Returns the percentage of the actor's techniques that have been
|
||||
validated or partially validated, along with a breakdown.
|
||||
"""
|
||||
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
|
||||
if not actor:
|
||||
raise HTTPException(status_code=404, detail="Threat actor not found")
|
||||
|
||||
# Get all techniques for this actor
|
||||
actor_techniques = (
|
||||
db.query(Technique)
|
||||
.join(ThreatActorTechnique, ThreatActorTechnique.technique_id == Technique.id)
|
||||
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
|
||||
.all()
|
||||
)
|
||||
|
||||
total = len(actor_techniques)
|
||||
if total == 0:
|
||||
return {
|
||||
"actor_id": str(actor.id),
|
||||
"actor_name": actor.name,
|
||||
"total_techniques": 0,
|
||||
"coverage_pct": 0.0,
|
||||
"breakdown": {},
|
||||
}
|
||||
|
||||
breakdown = {}
|
||||
for tech in actor_techniques:
|
||||
status = tech.status_global.value if tech.status_global else "not_evaluated"
|
||||
breakdown[status] = breakdown.get(status, 0) + 1
|
||||
|
||||
covered = breakdown.get("validated", 0) + breakdown.get("partial", 0)
|
||||
coverage_pct = round((covered / total * 100), 1)
|
||||
|
||||
return {
|
||||
"actor_id": str(actor.id),
|
||||
"actor_name": actor.name,
|
||||
"total_techniques": total,
|
||||
"covered": covered,
|
||||
"coverage_pct": coverage_pct,
|
||||
"breakdown": breakdown,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /threat-actors/{id}/gaps — Gap analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/{actor_id}/gaps")
|
||||
def get_threat_actor_gaps(
|
||||
actor_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
"""Identify techniques of this actor that are NOT fully validated.
|
||||
|
||||
**Requires** authentication (any role).
|
||||
|
||||
Returns list of gap techniques with available templates.
|
||||
"""
|
||||
actor = db.query(ThreatActor).filter(ThreatActor.id == actor_id).first()
|
||||
if not actor:
|
||||
raise HTTPException(status_code=404, detail="Threat actor not found")
|
||||
|
||||
# Get techniques NOT validated
|
||||
gap_techniques = (
|
||||
db.query(Technique, ThreatActorTechnique)
|
||||
.join(ThreatActorTechnique, ThreatActorTechnique.technique_id == Technique.id)
|
||||
.filter(ThreatActorTechnique.threat_actor_id == actor.id)
|
||||
.filter(Technique.status_global != TechniqueStatus.validated)
|
||||
.order_by(Technique.mitre_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
gaps = []
|
||||
for tech, at in gap_techniques:
|
||||
# Count available templates for this technique
|
||||
template_count = (
|
||||
db.query(TestTemplate)
|
||||
.filter(TestTemplate.mitre_technique_id == tech.mitre_id)
|
||||
.filter(TestTemplate.is_active == True)
|
||||
.count()
|
||||
)
|
||||
|
||||
# Count existing tests
|
||||
test_count = (
|
||||
db.query(Test)
|
||||
.filter(Test.technique_id == tech.id)
|
||||
.count()
|
||||
)
|
||||
|
||||
gaps.append({
|
||||
"technique_id": str(tech.id),
|
||||
"mitre_id": tech.mitre_id,
|
||||
"name": tech.name,
|
||||
"tactic": tech.tactic,
|
||||
"status_global": tech.status_global.value if tech.status_global else None,
|
||||
"usage_description": at.usage_description,
|
||||
"available_templates": template_count,
|
||||
"existing_tests": test_count,
|
||||
"has_templates": template_count > 0,
|
||||
})
|
||||
|
||||
return {
|
||||
"actor_id": str(actor.id),
|
||||
"actor_name": actor.name,
|
||||
"total_gaps": len(gaps),
|
||||
"gaps": gaps,
|
||||
}
|
||||
373
backend/app/services/threat_actor_import_service.py
Normal file
373
backend/app/services/threat_actor_import_service.py
Normal file
@@ -0,0 +1,373 @@
|
||||
"""Threat Actor import service (MITRE CTI / STIX 2.0).
|
||||
|
||||
Downloads the MITRE CTI repository, parses the STIX 2.0 bundle for
|
||||
``intrusion-set`` objects (APT groups) and ``relationship`` objects
|
||||
linking them to ``attack-pattern`` (techniques), then creates
|
||||
:class:`ThreatActor` and :class:`ThreatActorTechnique` records.
|
||||
|
||||
STIX 2.0 structure
|
||||
------------------
|
||||
The enterprise-attack bundle contains:
|
||||
- ``intrusion-set`` objects → our ThreatActor rows
|
||||
- ``attack-pattern`` objects → already in our Technique table
|
||||
- ``relationship`` objects (type=uses) → connects intrusion-set → attack-pattern
|
||||
|
||||
Strategy
|
||||
--------
|
||||
1. Download ZIP of ``github.com/mitre/cti``.
|
||||
2. Load ``enterprise-attack/enterprise-attack.json`` (single STIX bundle).
|
||||
3. Build lookup maps for intrusion-sets and attack-patterns.
|
||||
4. Parse relationships to connect actors → techniques.
|
||||
5. Upsert into database.
|
||||
|
||||
Idempotency
|
||||
-----------
|
||||
Deduplication by ``mitre_id`` for ThreatActor and by the unique
|
||||
constraint ``(threat_actor_id, technique_id)`` for ThreatActorTechnique.
|
||||
"""
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests as _requests
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.threat_actor import ThreatActor, ThreatActorTechnique
|
||||
from app.models.technique import Technique
|
||||
from app.models.data_source import DataSource
|
||||
from app.services.audit_service import log_action
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MITRE_CTI_ZIP_URL = (
|
||||
"https://github.com/mitre/cti"
|
||||
"/archive/refs/heads/master.zip"
|
||||
)
|
||||
|
||||
_DOWNLOAD_TIMEOUT = 300
|
||||
_ZIP_ROOT_PREFIX = "cti-master"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _download_zip(url: str = MITRE_CTI_ZIP_URL) -> bytes:
|
||||
"""Download the MITRE CTI ZIP and return raw bytes."""
|
||||
logger.info("Downloading MITRE CTI ZIP from %s …", url)
|
||||
resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
|
||||
resp.raise_for_status()
|
||||
content = resp.content
|
||||
logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
|
||||
return content
|
||||
|
||||
|
||||
def _extract_zip_and_load_bundle(zip_bytes: bytes, dest: str) -> dict:
|
||||
"""Extract ZIP and load the enterprise-attack STIX bundle."""
|
||||
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
|
||||
zf.extractall(dest)
|
||||
|
||||
bundle_path = (
|
||||
Path(dest) / _ZIP_ROOT_PREFIX
|
||||
/ "enterprise-attack" / "enterprise-attack.json"
|
||||
)
|
||||
if not bundle_path.is_file():
|
||||
raise FileNotFoundError(
|
||||
f"STIX bundle not found at {bundle_path}"
|
||||
)
|
||||
|
||||
logger.info("Loading STIX bundle from %s …", bundle_path)
|
||||
with open(bundle_path, "r", encoding="utf-8") as fh:
|
||||
bundle = json.load(fh)
|
||||
|
||||
objects = bundle.get("objects", [])
|
||||
logger.info("Loaded %d STIX objects", len(objects))
|
||||
return bundle
|
||||
|
||||
|
||||
def _extract_mitre_id(external_references: list) -> str | None:
|
||||
"""Extract the MITRE ATT&CK ID from external_references."""
|
||||
if not isinstance(external_references, list):
|
||||
return None
|
||||
for ref in external_references:
|
||||
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
|
||||
return ref.get("external_id")
|
||||
return None
|
||||
|
||||
|
||||
def _extract_mitre_url(external_references: list) -> str | None:
|
||||
"""Extract the MITRE ATT&CK URL from external_references."""
|
||||
if not isinstance(external_references, list):
|
||||
return None
|
||||
for ref in external_references:
|
||||
if isinstance(ref, dict) and ref.get("source_name") == "mitre-attack":
|
||||
return ref.get("url")
|
||||
return None
|
||||
|
||||
|
||||
def _parse_intrusion_sets(objects: list) -> list[dict]:
|
||||
"""Parse STIX intrusion-set objects into ThreatActor dicts."""
|
||||
actors = []
|
||||
for obj in objects:
|
||||
if obj.get("type") != "intrusion-set":
|
||||
continue
|
||||
if obj.get("revoked"):
|
||||
continue
|
||||
|
||||
ext_refs = obj.get("external_references", [])
|
||||
mitre_id = _extract_mitre_id(ext_refs)
|
||||
mitre_url = _extract_mitre_url(ext_refs)
|
||||
|
||||
name = obj.get("name", "").strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
aliases = obj.get("aliases", [])
|
||||
if isinstance(aliases, list) and name in aliases:
|
||||
aliases = [a for a in aliases if a != name]
|
||||
|
||||
description = obj.get("description", "")
|
||||
|
||||
# Extract references (non-MITRE)
|
||||
references = []
|
||||
for ref in ext_refs:
|
||||
if isinstance(ref, dict) and ref.get("source_name") != "mitre-attack":
|
||||
references.append({
|
||||
"source": ref.get("source_name", ""),
|
||||
"url": ref.get("url", ""),
|
||||
"description": ref.get("description", ""),
|
||||
})
|
||||
|
||||
actors.append({
|
||||
"stix_id": obj.get("id"), # e.g. "intrusion-set--abc123"
|
||||
"mitre_id": mitre_id,
|
||||
"name": name,
|
||||
"aliases": aliases if aliases else [],
|
||||
"description": description,
|
||||
"mitre_url": mitre_url,
|
||||
"references": references[:20], # cap to avoid bloat
|
||||
"first_seen": obj.get("first_seen"),
|
||||
"last_seen": obj.get("last_seen"),
|
||||
})
|
||||
|
||||
logger.info("Parsed %d intrusion-sets (threat actors)", len(actors))
|
||||
return actors
|
||||
|
||||
|
||||
def _parse_relationships(objects: list) -> list[dict]:
|
||||
"""Parse STIX relationship objects (type=uses) linking
|
||||
intrusion-sets to attack-patterns.
|
||||
"""
|
||||
relationships = []
|
||||
for obj in objects:
|
||||
if obj.get("type") != "relationship":
|
||||
continue
|
||||
if obj.get("relationship_type") != "uses":
|
||||
continue
|
||||
if obj.get("revoked"):
|
||||
continue
|
||||
|
||||
source_ref = obj.get("source_ref", "")
|
||||
target_ref = obj.get("target_ref", "")
|
||||
|
||||
# We want intrusion-set → attack-pattern
|
||||
if not source_ref.startswith("intrusion-set--"):
|
||||
continue
|
||||
if not target_ref.startswith("attack-pattern--"):
|
||||
continue
|
||||
|
||||
relationships.append({
|
||||
"source_ref": source_ref,
|
||||
"target_ref": target_ref,
|
||||
"description": obj.get("description", ""),
|
||||
})
|
||||
|
||||
logger.info("Parsed %d uses-relationships (actor→technique)", len(relationships))
|
||||
return relationships
|
||||
|
||||
|
||||
def _build_attack_pattern_map(objects: list) -> dict[str, str]:
|
||||
"""Build a map from STIX attack-pattern ID → MITRE technique ID.
|
||||
|
||||
e.g. {"attack-pattern--abc123": "T1059.001"}
|
||||
"""
|
||||
mapping = {}
|
||||
for obj in objects:
|
||||
if obj.get("type") != "attack-pattern":
|
||||
continue
|
||||
if obj.get("revoked"):
|
||||
continue
|
||||
stix_id = obj.get("id", "")
|
||||
mitre_id = _extract_mitre_id(obj.get("external_references", []))
|
||||
if stix_id and mitre_id:
|
||||
mapping[stix_id] = mitre_id
|
||||
logger.info("Built attack-pattern map with %d entries", len(mapping))
|
||||
return mapping
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def sync(db: Session) -> dict:
|
||||
"""Download and import threat actors from MITRE CTI.
|
||||
|
||||
Returns a summary dict.
|
||||
"""
|
||||
tmp_dir = tempfile.mkdtemp(prefix="aegis_mitre_cti_")
|
||||
try:
|
||||
zip_bytes = _download_zip()
|
||||
bundle = _extract_zip_and_load_bundle(zip_bytes, tmp_dir)
|
||||
finally:
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
logger.info("Cleaned up temp directory %s", tmp_dir)
|
||||
|
||||
objects = bundle.get("objects", [])
|
||||
|
||||
# Step 1: Parse data
|
||||
actor_dicts = _parse_intrusion_sets(objects)
|
||||
relationships = _parse_relationships(objects)
|
||||
attack_pattern_map = _build_attack_pattern_map(objects)
|
||||
|
||||
# Step 2: Build STIX-ID → actor dict map
|
||||
stix_to_actor = {a["stix_id"]: a for a in actor_dicts}
|
||||
|
||||
# Step 3: Load existing actors and techniques from DB
|
||||
existing_actors = {
|
||||
row.mitre_id: row
|
||||
for row in db.query(ThreatActor).all()
|
||||
if row.mitre_id
|
||||
}
|
||||
|
||||
technique_by_mitre_id = {
|
||||
row.mitre_id: row
|
||||
for row in db.query(Technique).all()
|
||||
}
|
||||
|
||||
# Step 4: Upsert threat actors
|
||||
actors_created = 0
|
||||
actors_skipped = 0
|
||||
stix_to_db_actor: dict[str, ThreatActor] = {}
|
||||
|
||||
for actor_dict in actor_dicts:
|
||||
mitre_id = actor_dict["mitre_id"]
|
||||
stix_id = actor_dict["stix_id"]
|
||||
|
||||
if mitre_id and mitre_id in existing_actors:
|
||||
# Update existing actor
|
||||
db_actor = existing_actors[mitre_id]
|
||||
db_actor.name = actor_dict["name"]
|
||||
db_actor.aliases = actor_dict["aliases"]
|
||||
db_actor.description = actor_dict["description"]
|
||||
db_actor.mitre_url = actor_dict["mitre_url"]
|
||||
db_actor.references = actor_dict["references"]
|
||||
db_actor.first_seen = actor_dict.get("first_seen")
|
||||
db_actor.last_seen = actor_dict.get("last_seen")
|
||||
stix_to_db_actor[stix_id] = db_actor
|
||||
actors_skipped += 1
|
||||
else:
|
||||
# Create new actor
|
||||
db_actor = ThreatActor(
|
||||
mitre_id=mitre_id,
|
||||
name=actor_dict["name"],
|
||||
aliases=actor_dict["aliases"],
|
||||
description=actor_dict["description"],
|
||||
mitre_url=actor_dict["mitre_url"],
|
||||
references=actor_dict["references"],
|
||||
first_seen=actor_dict.get("first_seen"),
|
||||
last_seen=actor_dict.get("last_seen"),
|
||||
is_active=True,
|
||||
)
|
||||
db.add(db_actor)
|
||||
db.flush() # get the ID
|
||||
if mitre_id:
|
||||
existing_actors[mitre_id] = db_actor
|
||||
stix_to_db_actor[stix_id] = db_actor
|
||||
actors_created += 1
|
||||
|
||||
db.flush()
|
||||
|
||||
# Step 5: Upsert actor-technique relationships
|
||||
# Load existing relationships
|
||||
existing_rels: set[tuple] = set()
|
||||
for row in db.query(ThreatActorTechnique).all():
|
||||
existing_rels.add((str(row.threat_actor_id), str(row.technique_id)))
|
||||
|
||||
rels_created = 0
|
||||
rels_skipped = 0
|
||||
|
||||
for rel in relationships:
|
||||
source_ref = rel["source_ref"]
|
||||
target_ref = rel["target_ref"]
|
||||
|
||||
# Resolve actor
|
||||
db_actor = stix_to_db_actor.get(source_ref)
|
||||
if not db_actor:
|
||||
continue
|
||||
|
||||
# Resolve technique
|
||||
mitre_technique_id = attack_pattern_map.get(target_ref)
|
||||
if not mitre_technique_id:
|
||||
continue
|
||||
|
||||
db_technique = technique_by_mitre_id.get(mitre_technique_id)
|
||||
if not db_technique:
|
||||
continue
|
||||
|
||||
rel_key = (str(db_actor.id), str(db_technique.id))
|
||||
if rel_key in existing_rels:
|
||||
rels_skipped += 1
|
||||
continue
|
||||
|
||||
actor_technique = ThreatActorTechnique(
|
||||
threat_actor_id=db_actor.id,
|
||||
technique_id=db_technique.id,
|
||||
usage_description=rel["description"][:5000] if rel["description"] else None,
|
||||
)
|
||||
db.add(actor_technique)
|
||||
existing_rels.add(rel_key)
|
||||
rels_created += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
summary = {
|
||||
"actors_created": actors_created,
|
||||
"actors_updated": actors_skipped,
|
||||
"relationships_created": rels_created,
|
||||
"relationships_skipped": rels_skipped,
|
||||
"total_actors_parsed": len(actor_dicts),
|
||||
"total_relationships_parsed": len(relationships),
|
||||
}
|
||||
|
||||
# Update DataSource record
|
||||
ds = db.query(DataSource).filter(DataSource.name == "mitre_cti").first()
|
||||
if ds:
|
||||
ds.last_sync_at = datetime.utcnow()
|
||||
ds.last_sync_status = "success"
|
||||
ds.last_sync_stats = summary
|
||||
db.commit()
|
||||
|
||||
logger.info("MITRE CTI threat actor import complete — %s", summary)
|
||||
|
||||
log_action(
|
||||
db,
|
||||
user_id=None,
|
||||
action="import_threat_actors",
|
||||
entity_type="threat_actor",
|
||||
entity_id=None,
|
||||
details=summary,
|
||||
)
|
||||
|
||||
return summary
|
||||
Reference in New Issue
Block a user