feat: Phase 4 - MITRE ATT&CK sync and scheduled job (T-018, T-019)

- Add MITRE sync service via TAXII 2.0 with GitHub fallback
- Upsert attack-pattern objects into techniques table (691 techniques)
- Detect name/description changes and flag review_required on re-sync
- Add APScheduler background job running every 24h
- Add POST /system/sync-mitre endpoint (admin only)
- Add GET /system/scheduler-status endpoint (admin only)
- Configure logging for scheduler and sync visibility
- Update README with new endpoints and project structure
This commit is contained in:
2026-02-06 15:28:53 +01:00
parent 4f6dd838fd
commit b11854fdab
6 changed files with 384 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ Aegis is a comprehensive platform for tracking and managing security coverage ag
## Features ## Features
- **MITRE ATT&CK Integration**: Automatic synchronization with the MITRE ATT&CK framework via TAXII - **MITRE ATT&CK Integration**: Automatic synchronization with the MITRE ATT&CK framework via TAXII (with GitHub fallback), scheduled every 24h
- **Coverage Tracking**: Track validation status for each technique (validated, partial, not covered, in progress) - **Coverage Tracking**: Track validation status for each technique (validated, partial, not covered, in progress)
- **Test Management**: Document and manage security tests with full audit trail - **Test Management**: Document and manage security tests with full audit trail
- **Evidence Storage**: Secure evidence file storage with SHA256 integrity verification - **Evidence Storage**: Secure evidence file storage with SHA256 integrity verification
@@ -121,6 +121,12 @@ Once the backend is running, access the interactive API documentation at:
| POST | `/api/v1/tests/{test_id}/evidence` | Authenticated | Upload evidence file (SHA-256 verified) | | POST | `/api/v1/tests/{test_id}/evidence` | Authenticated | Upload evidence file (SHA-256 verified) |
| GET | `/api/v1/evidence/{id}` | Authenticated | Get metadata + presigned download URL | | GET | `/api/v1/evidence/{id}` | Authenticated | Get metadata + presigned download URL |
### System
| Method | Route | Auth | Description |
|--------|-------|------|-------------|
| POST | `/api/v1/system/sync-mitre` | Admin | Manually trigger MITRE ATT&CK sync |
| GET | `/api/v1/system/scheduler-status` | Admin | Background scheduler health & job list |
## Project Structure ## Project Structure
``` ```
@@ -159,12 +165,16 @@ Aegis/
│ │ ├── auth.py # POST /auth/login, GET /auth/me │ │ ├── auth.py # POST /auth/login, GET /auth/me
│ │ ├── techniques.py # CRUD techniques (list, detail, create, update, review) │ │ ├── techniques.py # CRUD techniques (list, detail, create, update, review)
│ │ ├── tests.py # CRUD tests (create, detail, update, validate, reject) │ │ ├── tests.py # CRUD tests (create, detail, update, validate, reject)
│ │ ── evidence.py # Upload evidence, presigned download │ │ ── evidence.py # Upload evidence, presigned download
│ │ └── system.py # MITRE sync trigger, scheduler status
│ ├── dependencies/ # FastAPI dependencies (DI) │ ├── dependencies/ # FastAPI dependencies (DI)
│ │ └── auth.py # get_current_user, require_role, require_any_role │ │ └── auth.py # get_current_user, require_role, require_any_role
│ ├── jobs/ # Background scheduled jobs
│ │ └── mitre_sync_job.py # APScheduler job: sync MITRE every 24h
│ └── services/ # Business logic services │ └── services/ # Business logic services
│ ├── audit_service.py │ ├── audit_service.py
── status_service.py # Recalculate technique status from tests ── status_service.py # Recalculate technique status from tests
│ └── mitre_sync_service.py # MITRE ATT&CK sync via TAXII / GitHub
└── frontend/ # React frontend (coming soon) └── frontend/ # React frontend (coming soon)
``` ```

View File

View File

@@ -0,0 +1,53 @@
"""Scheduled job for periodic MITRE ATT&CK synchronisation.
Uses APScheduler's ``BackgroundScheduler`` to run :func:`sync_mitre` every
24 hours. The job manages its own database session (created on entry,
closed in ``finally``) so it is fully independent from FastAPI's
request-scoped sessions.
"""
import logging
from apscheduler.schedulers.background import BackgroundScheduler
from app.database import SessionLocal
from app.services.mitre_sync_service import sync_mitre
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Module-level scheduler instance
# ---------------------------------------------------------------------------
scheduler = BackgroundScheduler()
def _run_mitre_sync() -> None:
"""Execute a MITRE sync inside its own DB session."""
logger.info("Scheduled MITRE sync job starting...")
db = SessionLocal()
try:
summary = sync_mitre(db)
logger.info("Scheduled MITRE sync job finished — %s", summary)
except Exception:
logger.exception("Scheduled MITRE sync job failed")
finally:
db.close()
def start_scheduler() -> None:
"""Register the MITRE sync job and start the background scheduler.
The job runs every **24 hours**. It does **not** fire immediately on
startup — the first execution happens 24 h after the application boots.
"""
scheduler.add_job(
_run_mitre_sync,
trigger="interval",
hours=24,
id="mitre_sync",
name="MITRE ATT&CK sync (every 24h)",
replace_existing=True,
)
scheduler.start()
logger.info("MITRE sync scheduler started (interval=24h)")

View File

@@ -1,3 +1,4 @@
import logging
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI
@@ -7,13 +8,24 @@ from app.routers import auth as auth_router
from app.routers import techniques as techniques_router from app.routers import techniques as techniques_router
from app.routers import tests as tests_router from app.routers import tests as tests_router
from app.routers import evidence as evidence_router from app.routers import evidence as evidence_router
from app.routers import system as system_router
from app.storage import ensure_bucket_exists from app.storage import ensure_bucket_exists
from app.jobs.mitre_sync_job import start_scheduler, scheduler
# ── Logging ───────────────────────────────────────────────────────────────
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(name)s%(message)s",
)
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Startup / shutdown logic.""" """Startup / shutdown logic."""
ensure_bucket_exists() ensure_bucket_exists()
start_scheduler()
yield yield
# Graceful shutdown of the background scheduler
scheduler.shutdown(wait=False)
app = FastAPI(title="Attack Coverage Platform", lifespan=lifespan) app = FastAPI(title="Attack Coverage Platform", lifespan=lifespan)
@@ -32,6 +44,7 @@ app.include_router(auth_router.router, prefix="/api/v1")
app.include_router(techniques_router.router, prefix="/api/v1") app.include_router(techniques_router.router, prefix="/api/v1")
app.include_router(tests_router.router, prefix="/api/v1") app.include_router(tests_router.router, prefix="/api/v1")
app.include_router(evidence_router.router, prefix="/api/v1") app.include_router(evidence_router.router, prefix="/api/v1")
app.include_router(system_router.router, prefix="/api/v1")
@app.get("/health") @app.get("/health")

View File

@@ -0,0 +1,58 @@
"""System-level endpoints (admin only).
Provides manual triggers for background operations such as the MITRE
ATT&CK synchronisation, and scheduler health introspection.
"""
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies.auth import require_role
from app.models.user import User
from app.services.mitre_sync_service import sync_mitre
from app.jobs.mitre_sync_job import scheduler
router = APIRouter(prefix="/system", tags=["system"])
@router.post("/sync-mitre")
def trigger_mitre_sync(
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Manually trigger a MITRE ATT&CK synchronisation.
**Requires** the ``admin`` role.
Returns a JSON object with the sync summary including the count of
new and updated techniques.
"""
summary = sync_mitre(db)
return {
"message": "MITRE sync completed",
"new": summary["created"],
"updated": summary["updated"],
}
@router.get("/scheduler-status")
def scheduler_status(
current_user: User = Depends(require_role("admin")),
):
"""Return the current state of the background scheduler.
**Requires** the ``admin`` role.
"""
jobs = scheduler.get_jobs()
return {
"running": scheduler.running,
"jobs": [
{
"id": job.id,
"name": job.name,
"next_run_time": str(job.next_run_time) if job.next_run_time else None,
}
for job in jobs
],
}

View File

@@ -0,0 +1,247 @@
"""Service for synchronizing MITRE ATT&CK techniques via TAXII 2.0.
Connects to the official MITRE CTI TAXII server, fetches the Enterprise
ATT&CK collection, and upserts attack-pattern objects into the local
``techniques`` table. Falls back to the MITRE CTI GitHub repository
when the TAXII server is unreachable.
"""
import logging
from datetime import datetime
import requests as _requests
from sqlalchemy.orm import Session
from taxii2client.v20 import Server as TaxiiServer
from app.models.technique import Technique
from app.models.enums import TechniqueStatus
from app.services.audit_service import log_action
logger = logging.getLogger(__name__)
TAXII_SERVER_URL = "https://cti-taxii.mitre.org/taxii/"
MITRE_SOURCE_NAME = "mitre-attack"
GITHUB_ENTERPRISE_URL = (
"https://raw.githubusercontent.com/mitre/cti/master/"
"enterprise-attack/enterprise-attack.json"
)
def _extract_mitre_id(external_references: list) -> str | None:
"""Return the MITRE ATT&CK ID (e.g. ``T1059.001``) from external_references."""
if not external_references:
return None
for ref in external_references:
if ref.get("source_name") == MITRE_SOURCE_NAME:
return ref.get("external_id")
return None
def _extract_tactics(kill_chain_phases: list) -> str | None:
"""Return a comma-separated string of tactic phase names."""
if not kill_chain_phases:
return None
tactics = [
phase.get("phase_name")
for phase in kill_chain_phases
if phase.get("kill_chain_name") == "mitre-attack"
]
return ", ".join(tactics) if tactics else None
def _extract_platforms(stix_object: dict) -> list:
"""Return the list of platforms from the STIX object."""
return stix_object.get("x_mitre_platforms", [])
def _extract_version(stix_object: dict) -> str | None:
"""Return the MITRE ATT&CK version string."""
return stix_object.get("x_mitre_version")
def _extract_last_modified(stix_object: dict) -> datetime | None:
"""Return the ``modified`` timestamp as a datetime, or None."""
modified = stix_object.get("modified")
if modified is None:
return None
if isinstance(modified, datetime):
return modified
try:
return datetime.fromisoformat(modified.replace("Z", "+00:00"))
except (ValueError, AttributeError):
return None
def _fetch_attack_patterns_taxii() -> list[dict]:
"""Connect to the MITRE TAXII server and return all attack-pattern objects."""
logger.info("Connecting to MITRE TAXII server at %s", TAXII_SERVER_URL)
server = TaxiiServer(TAXII_SERVER_URL)
api_root = server.api_roots[0]
collection = api_root.collections[0] # Enterprise ATT&CK
logger.info(
"Fetching objects from collection '%s' (id=%s)",
collection.title,
collection.id,
)
bundle = collection.get_objects()
objects = bundle.get("objects", [])
attack_patterns = [
obj for obj in objects if obj.get("type") == "attack-pattern"
]
logger.info("Retrieved %d attack-pattern objects via TAXII", len(attack_patterns))
return attack_patterns
def _fetch_attack_patterns_github() -> list[dict]:
"""Fallback: fetch Enterprise ATT&CK bundle from the MITRE CTI GitHub repo."""
logger.info("Fetching Enterprise ATT&CK bundle from GitHub (%s)", GITHUB_ENTERPRISE_URL)
resp = _requests.get(GITHUB_ENTERPRISE_URL, timeout=120)
resp.raise_for_status()
bundle = resp.json()
objects = bundle.get("objects", [])
attack_patterns = [
obj for obj in objects if obj.get("type") == "attack-pattern"
]
logger.info("Retrieved %d attack-pattern objects via GitHub", len(attack_patterns))
return attack_patterns
def _fetch_attack_patterns() -> list[dict]:
"""Return all attack-pattern objects, trying TAXII first then GitHub."""
try:
return _fetch_attack_patterns_taxii()
except Exception as exc:
logger.warning(
"TAXII server unavailable (%s), falling back to GitHub mirror",
exc,
)
return _fetch_attack_patterns_github()
def sync_mitre(db: Session) -> dict:
"""Synchronize MITRE ATT&CK techniques into the local database.
Parameters
----------
db : Session
Active SQLAlchemy database session.
Returns
-------
dict
Summary with keys ``created``, ``updated``, ``unchanged``, ``skipped``.
"""
attack_patterns = _fetch_attack_patterns()
# Pre-load existing techniques keyed by mitre_id for fast lookup
existing_techniques: dict[str, Technique] = {
t.mitre_id: t for t in db.query(Technique).all()
}
created = 0
updated = 0
unchanged = 0
skipped = 0
for obj in attack_patterns:
# ------------------------------------------------------------------
# Skip revoked / deprecated objects
# ------------------------------------------------------------------
if obj.get("revoked", False) or obj.get("x_mitre_deprecated", False):
skipped += 1
continue
mitre_id = _extract_mitre_id(obj.get("external_references", []))
if not mitre_id:
skipped += 1
continue
name = obj.get("name", "")
description = obj.get("description", "")
tactic = _extract_tactics(obj.get("kill_chain_phases", []))
platforms = _extract_platforms(obj)
version = _extract_version(obj)
last_modified = _extract_last_modified(obj)
is_subtechnique = "." in mitre_id
parent_mitre_id = mitre_id.split(".")[0] if is_subtechnique else None
existing = existing_techniques.get(mitre_id)
if existing is None:
# ---- Create new technique ----
technique = Technique(
mitre_id=mitre_id,
name=name,
description=description,
tactic=tactic,
platforms=platforms,
mitre_version=version,
mitre_last_modified=last_modified,
is_subtechnique=is_subtechnique,
parent_mitre_id=parent_mitre_id,
status_global=TechniqueStatus.not_evaluated,
review_required=False,
)
db.add(technique)
existing_techniques[mitre_id] = technique
created += 1
else:
# ---- Update if name or description changed ----
changes = False
if existing.name != name:
existing.name = name
changes = True
if (existing.description or "") != (description or ""):
existing.description = description
changes = True
# Always keep metadata up-to-date (does not trigger review)
existing.tactic = tactic
existing.platforms = platforms
existing.mitre_version = version
existing.mitre_last_modified = last_modified
existing.is_subtechnique = is_subtechnique
existing.parent_mitre_id = parent_mitre_id
if changes:
existing.review_required = True
updated += 1
else:
unchanged += 1
# Single commit for the whole batch
db.commit()
summary = {
"created": created,
"updated": updated,
"unchanged": unchanged,
"skipped": skipped,
}
logger.info(
"MITRE sync complete — created=%d, updated=%d, unchanged=%d, skipped=%d",
created,
updated,
unchanged,
skipped,
)
# Audit log (system action → user_id=None)
log_action(
db,
user_id=None,
action="mitre_sync",
entity_type="technique",
entity_id=None,
details=summary,
)
return summary