feat(evaluations): ATT&CK Evaluations importer for CrowdStrike Falcon [FASE-6.1]
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled

- Migration b048: evaluation_imports table (adversary, round, status, tests_created)
- EvaluationImport SQLAlchemy model
- attck_evaluations_service: fetch rounds from evals.mitre.org API, import per-technique
  detection results (Technique/Tactic/Telemetry -> detected/partially/not_detected)
- All imported tests land in in_review state with lab-environment disclaimer
- Idempotency guard prevents duplicate round imports
- 4 new endpoints: list rounds, import specific, import latest, check-new
- Weekly APScheduler cron (Mon 06:00) auto-checks and imports new rounds
- SystemPage UI: rounds table, import buttons, check-new, result feedback
- Disclaimer callout reminding admins these are lab results not org coverage

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
kitos
2026-06-05 15:57:03 +02:00
parent cfc48ccd2b
commit e3e79be35a
7 changed files with 1067 additions and 1 deletions

View File

@@ -0,0 +1,401 @@
"""ATT&CK Evaluations importer — fetches real CrowdStrike detection results
from MITRE Engenuity's public API and seeds the platform with validated tests.
Data source
-----------
https://evals.mitre.org/api/
- /participants/ → list of vendors + rounds they completed
- /results/?participant=crowdstrike&domain=ENTERPRISE
→ per-substep detection results per adversary
Detection level mapping (MITRE → Aegis)
---------------------------------------
Technique / Specific Behavior → detected (correctly identified ATT&CK technique)
Tactic → partially_detected (behavior noted but not categorized)
General / IOC / MSSP → partially_detected (anomaly detected, not ATT&CK-mapped)
Telemetry → partially_detected (raw data only — marginal detection)
None / N/A → not_detected
All imported tests are created in ``in_review`` state so Blue Leads must
confirm each result before it counts as real coverage for the organisation.
Important caveats stored in every test's description
------------------------------------------------------
"Source: MITRE ATT&CK Evaluation (Round N — Adversary). Results reflect
CrowdStrike Falcon in a controlled lab environment, NOT this organisation's
deployment. Validate detection in your own environment before approving."
"""
import logging
import uuid
from datetime import datetime
from typing import Any
import requests
from sqlalchemy.orm import Session
from app.models.enums import TestState, TestResult
from app.models.evaluation_import import EvaluationImport
from app.models.technique import Technique
from app.models.test import Test
from app.models.user import User
from app.services.audit_service import log_action
from app.services.status_service import recalculate_technique_status
logger = logging.getLogger(__name__)
_BASE = "https://evals.mitre.org"
_TIMEOUT = 30 # seconds per HTTP call
_VENDOR = "crowdstrike"
_DOMAIN = "ENTERPRISE"
# Detection type → quality score (higher = better)
_DETECTION_SCORE: dict[str, int] = {
"none": 0,
"n/a": 0,
"telemetry": 1,
"mssp": 2,
"general": 2,
"ioc": 2,
"tactic": 3,
"technique": 4,
"specific behavior": 4,
}
def _score(detection_type: str) -> int:
key = (detection_type or "").lower().strip()
for pattern, score in _DETECTION_SCORE.items():
if pattern in key:
return score
return 0
def _score_to_result(score: int) -> TestResult:
if score >= 4:
return TestResult.detected
if score >= 1:
return TestResult.partially_detected
return TestResult.not_detected
# ---------------------------------------------------------------------------
# Public API helpers
# ---------------------------------------------------------------------------
def fetch_available_rounds() -> list[dict[str, Any]]:
"""Return all evaluation rounds CrowdStrike has completed (ENTERPRISE only).
Each dict has: name, display_name, eval_round.
Sorted by eval_round ascending.
"""
try:
resp = requests.get(f"{_BASE}/api/participants/", timeout=_TIMEOUT)
resp.raise_for_status()
participants = resp.json()
except Exception as exc:
logger.error("Failed to fetch ATT&CK Evaluations participants: %s", exc)
raise
crowdstrike = next(
(p for p in participants if p.get("name", "").lower() == _VENDOR),
None,
)
if not crowdstrike:
raise ValueError(f"Vendor '{_VENDOR}' not found in evaluations participants list")
rounds = [
adv
for adv in crowdstrike.get("adversaries_completed", [])
if adv.get("domain", "").upper() == _DOMAIN
and adv.get("status", "").upper() == "PUBLIC"
]
rounds.sort(key=lambda x: x.get("eval_round", 0))
return rounds
def get_latest_round() -> dict[str, Any]:
"""Return the most recent PUBLIC ENTERPRISE round CrowdStrike participated in."""
rounds = fetch_available_rounds()
if not rounds:
raise ValueError("No public Enterprise evaluation rounds found for CrowdStrike")
return rounds[-1]
def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
"""Fetch all per-substep detection results for a specific adversary round.
Returns a flat list of substep dicts, each containing:
technique_id, technique_name, tactic_id, best_score, detection_type, note.
"""
url = f"{_BASE}/api/results/?participant={_VENDOR}&domain={_DOMAIN}"
try:
resp = requests.get(url, timeout=_TIMEOUT)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
logger.error("Failed to fetch ATT&CK Evaluations results: %s", exc)
raise
# Find the adversary in the response
adversaries = data.get("adversaries", [])
target = next(
(a for a in adversaries if a.get("Adversary_Name", "").lower() == adversary_name.lower()),
None,
)
if not target:
raise ValueError(
f"Adversary '{adversary_name}' not found in results. "
f"Available: {[a.get('Adversary_Name') for a in adversaries]}"
)
substeps: list[dict[str, Any]] = []
scenarios = target.get("Detections_By_Step", {})
for _scenario_name, scenario_data in scenarios.items():
for step in scenario_data.get("Steps", []):
for substep in step.get("Substeps", []):
# Prefer sub-technique over technique
sub = substep.get("Subtechnique") or {}
tech = substep.get("Technique") or {}
tactic = substep.get("Tactic") or {}
technique_id = (
sub.get("Subtechnique_Id")
or tech.get("Technique_Id")
or ""
).strip()
technique_name = (
sub.get("Subtechnique_Name")
or tech.get("Technique_Name")
or "Unknown"
).strip()
if not technique_id:
continue
detections = substep.get("Detections", [])
best_score = 0
best_type = "None"
best_note = ""
for det in detections:
dtype = det.get("Detection_Type", "None")
s = _score(dtype)
if s > best_score:
best_score = s
best_type = dtype
best_note = det.get("Detection_Note", "")
substeps.append(
{
"technique_id": technique_id,
"technique_name": technique_name,
"tactic_id": tactic.get("Tactic_Id", ""),
"tactic_name": tactic.get("Tactic_Name", ""),
"best_score": best_score,
"detection_type": best_type,
"note": best_note,
}
)
return substeps
def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
"""Aggregate substep results per technique — keep best detection score."""
by_technique: dict[str, dict] = {}
for sub in substeps:
tid = sub["technique_id"]
if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
by_technique[tid] = sub
return by_technique
# ---------------------------------------------------------------------------
# Main import function
# ---------------------------------------------------------------------------
def import_evaluation_round(
db: Session,
adversary_name: str,
adversary_display: str,
eval_round: int,
current_user: User,
) -> dict[str, Any]:
"""Import a single ATT&CK Evaluation round for CrowdStrike into the platform.
Creates one Test per unique technique with the best detection result
observed across all substeps for that technique. All tests land in
``in_review`` state — Blue Leads must confirm before they count as coverage.
Returns a summary dict: created, skipped, techniques_covered.
Raises if the round was already imported (idempotency guard).
"""
# Idempotency — refuse duplicate imports
existing = (
db.query(EvaluationImport)
.filter(
EvaluationImport.adversary_name == adversary_name.lower(),
EvaluationImport.status == "completed",
)
.first()
)
if existing:
raise ValueError(
f"Round '{adversary_display}' (round {eval_round}) was already imported "
f"on {existing.imported_at.date()}. Re-import is not allowed."
)
# Fetch and aggregate substep results
substeps = fetch_results_for_adversary(adversary_name)
by_technique = _aggregate_by_technique(substeps)
created = 0
skipped = 0
affected_technique_ids: set = set()
for mitre_id, agg in by_technique.items():
# Look up the technique in our DB
technique = (
db.query(Technique)
.filter(Technique.mitre_id == mitre_id.upper())
.first()
)
if technique is None:
skipped += 1
continue
detection_result = _score_to_result(agg["best_score"])
description = (
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
f"Vendor: CrowdStrike Falcon.\n"
f"Detection type achieved: {agg['detection_type']}.\n\n"
f"⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
f"controlled MITRE lab environment against a simulated {adversary_display} "
f"adversary. They do NOT represent your organisation's actual detection "
f"capability. Validate in your own environment before approving."
)
if agg["note"]:
description += f"\n\nMITRE note: {agg['note']}"
red_summary = (
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
f"Vendor: CrowdStrike Falcon\n"
f"Best detection level: {agg['detection_type']}\n"
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
)
test = Test(
technique_id=technique.id,
name=f"[EVAL R{eval_round}] {adversary_display}{technique.name}",
description=description,
platform=None,
procedure_text=(
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
f"See evaluation report at https://evals.mitre.org for full details."
),
created_by=current_user.id,
state=TestState.in_review,
attack_success=True,
red_summary=red_summary,
red_validation_status="approved",
red_validated_by=current_user.id,
red_validated_at=datetime.utcnow(),
detection_result=detection_result,
blue_validation_status=None,
execution_date=datetime.utcnow(),
created_at=datetime.utcnow(),
)
db.add(test)
db.flush()
log_action(
db,
user_id=current_user.id,
action="eval_import_test",
entity_type="test",
entity_id=test.id,
details={
"adversary": adversary_name,
"eval_round": eval_round,
"mitre_id": mitre_id,
"detection_type": agg["detection_type"],
},
)
affected_technique_ids.add(technique.id)
created += 1
# Recalculate coverage for all touched techniques
for tech_id in affected_technique_ids:
tech = db.query(Technique).filter(Technique.id == tech_id).first()
if tech:
recalculate_technique_status(db, tech)
# Record the import
record = EvaluationImport(
id=uuid.uuid4(),
adversary_name=adversary_name.lower(),
adversary_display=adversary_display,
eval_round=eval_round,
imported_at=datetime.utcnow(),
imported_by=current_user.id,
tests_created=created,
techniques_covered=len(affected_technique_ids),
status="completed",
notes=f"Skipped {skipped} techniques not found in local DB.",
)
db.add(record)
db.commit()
logger.info(
"ATT&CK Evaluation import complete — round %d (%s): %d tests created, %d skipped",
eval_round, adversary_display, created, skipped,
)
return {
"created": created,
"skipped": skipped,
"techniques_covered": len(affected_technique_ids),
"adversary": adversary_display,
"eval_round": eval_round,
}
# ---------------------------------------------------------------------------
# New-round check (used by the weekly scheduler)
# ---------------------------------------------------------------------------
def check_for_new_round(db: Session) -> dict[str, Any]:
"""Check if a new evaluation round is available that hasn't been imported yet.
Returns:
{"new_round_available": bool, "latest_round": dict | None, "already_imported": bool}
"""
try:
latest = get_latest_round()
except Exception as exc:
logger.warning("Could not check for new ATT&CK Evaluation round: %s", exc)
return {"new_round_available": False, "latest_round": None, "error": str(exc)}
already = (
db.query(EvaluationImport)
.filter(
EvaluationImport.adversary_name == latest["name"].lower(),
EvaluationImport.status == "completed",
)
.first()
)
return {
"new_round_available": already is None,
"already_imported": already is not None,
"latest_round": {
"name": latest["name"],
"display_name": latest.get("display_name", latest["name"]),
"eval_round": latest["eval_round"],
},
}