feat(evaluations): ATT&CK Evaluations importer for CrowdStrike Falcon [FASE-6.1]
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled
- Migration b048: evaluation_imports table (adversary, round, status, tests_created) - EvaluationImport SQLAlchemy model - attck_evaluations_service: fetch rounds from evals.mitre.org API, import per-technique detection results (Technique/Tactic/Telemetry -> detected/partially/not_detected) - All imported tests land in in_review state with lab-environment disclaimer - Idempotency guard prevents duplicate round imports - 4 new endpoints: list rounds, import specific, import latest, check-new - Weekly APScheduler cron (Mon 06:00) auto-checks and imports new rounds - SystemPage UI: rounds table, import buttons, check-new, result feedback - Disclaimer callout reminding admins these are lab results not org coverage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
401
backend/app/services/attck_evaluations_service.py
Normal file
401
backend/app/services/attck_evaluations_service.py
Normal file
@@ -0,0 +1,401 @@
|
||||
"""ATT&CK Evaluations importer — fetches real CrowdStrike detection results
|
||||
from MITRE Engenuity's public API and seeds the platform with validated tests.
|
||||
|
||||
Data source
|
||||
-----------
|
||||
https://evals.mitre.org/api/
|
||||
- /participants/ → list of vendors + rounds they completed
|
||||
- /results/?participant=crowdstrike&domain=ENTERPRISE
|
||||
→ per-substep detection results per adversary
|
||||
|
||||
Detection level mapping (MITRE → Aegis)
|
||||
---------------------------------------
|
||||
Technique / Specific Behavior → detected (correctly identified ATT&CK technique)
|
||||
Tactic → partially_detected (behavior noted but not categorized)
|
||||
General / IOC / MSSP → partially_detected (anomaly detected, not ATT&CK-mapped)
|
||||
Telemetry → partially_detected (raw data only — marginal detection)
|
||||
None / N/A → not_detected
|
||||
|
||||
All imported tests are created in ``in_review`` state so Blue Leads must
|
||||
confirm each result before it counts as real coverage for the organisation.
|
||||
|
||||
Important caveats stored in every test's description
|
||||
------------------------------------------------------
|
||||
"Source: MITRE ATT&CK Evaluation (Round N — Adversary). Results reflect
|
||||
CrowdStrike Falcon in a controlled lab environment, NOT this organisation's
|
||||
deployment. Validate detection in your own environment before approving."
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.enums import TestState, TestResult
|
||||
from app.models.evaluation_import import EvaluationImport
|
||||
from app.models.technique import Technique
|
||||
from app.models.test import Test
|
||||
from app.models.user import User
|
||||
from app.services.audit_service import log_action
|
||||
from app.services.status_service import recalculate_technique_status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BASE = "https://evals.mitre.org"
|
||||
_TIMEOUT = 30 # seconds per HTTP call
|
||||
_VENDOR = "crowdstrike"
|
||||
_DOMAIN = "ENTERPRISE"
|
||||
|
||||
# Detection type → quality score (higher = better)
|
||||
_DETECTION_SCORE: dict[str, int] = {
|
||||
"none": 0,
|
||||
"n/a": 0,
|
||||
"telemetry": 1,
|
||||
"mssp": 2,
|
||||
"general": 2,
|
||||
"ioc": 2,
|
||||
"tactic": 3,
|
||||
"technique": 4,
|
||||
"specific behavior": 4,
|
||||
}
|
||||
|
||||
|
||||
def _score(detection_type: str) -> int:
|
||||
key = (detection_type or "").lower().strip()
|
||||
for pattern, score in _DETECTION_SCORE.items():
|
||||
if pattern in key:
|
||||
return score
|
||||
return 0
|
||||
|
||||
|
||||
def _score_to_result(score: int) -> TestResult:
|
||||
if score >= 4:
|
||||
return TestResult.detected
|
||||
if score >= 1:
|
||||
return TestResult.partially_detected
|
||||
return TestResult.not_detected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def fetch_available_rounds() -> list[dict[str, Any]]:
|
||||
"""Return all evaluation rounds CrowdStrike has completed (ENTERPRISE only).
|
||||
|
||||
Each dict has: name, display_name, eval_round.
|
||||
Sorted by eval_round ascending.
|
||||
"""
|
||||
try:
|
||||
resp = requests.get(f"{_BASE}/api/participants/", timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
participants = resp.json()
|
||||
except Exception as exc:
|
||||
logger.error("Failed to fetch ATT&CK Evaluations participants: %s", exc)
|
||||
raise
|
||||
|
||||
crowdstrike = next(
|
||||
(p for p in participants if p.get("name", "").lower() == _VENDOR),
|
||||
None,
|
||||
)
|
||||
if not crowdstrike:
|
||||
raise ValueError(f"Vendor '{_VENDOR}' not found in evaluations participants list")
|
||||
|
||||
rounds = [
|
||||
adv
|
||||
for adv in crowdstrike.get("adversaries_completed", [])
|
||||
if adv.get("domain", "").upper() == _DOMAIN
|
||||
and adv.get("status", "").upper() == "PUBLIC"
|
||||
]
|
||||
rounds.sort(key=lambda x: x.get("eval_round", 0))
|
||||
return rounds
|
||||
|
||||
|
||||
def get_latest_round() -> dict[str, Any]:
|
||||
"""Return the most recent PUBLIC ENTERPRISE round CrowdStrike participated in."""
|
||||
rounds = fetch_available_rounds()
|
||||
if not rounds:
|
||||
raise ValueError("No public Enterprise evaluation rounds found for CrowdStrike")
|
||||
return rounds[-1]
|
||||
|
||||
|
||||
def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
|
||||
"""Fetch all per-substep detection results for a specific adversary round.
|
||||
|
||||
Returns a flat list of substep dicts, each containing:
|
||||
technique_id, technique_name, tactic_id, best_score, detection_type, note.
|
||||
"""
|
||||
url = f"{_BASE}/api/results/?participant={_VENDOR}&domain={_DOMAIN}"
|
||||
try:
|
||||
resp = requests.get(url, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.error("Failed to fetch ATT&CK Evaluations results: %s", exc)
|
||||
raise
|
||||
|
||||
# Find the adversary in the response
|
||||
adversaries = data.get("adversaries", [])
|
||||
target = next(
|
||||
(a for a in adversaries if a.get("Adversary_Name", "").lower() == adversary_name.lower()),
|
||||
None,
|
||||
)
|
||||
if not target:
|
||||
raise ValueError(
|
||||
f"Adversary '{adversary_name}' not found in results. "
|
||||
f"Available: {[a.get('Adversary_Name') for a in adversaries]}"
|
||||
)
|
||||
|
||||
substeps: list[dict[str, Any]] = []
|
||||
|
||||
scenarios = target.get("Detections_By_Step", {})
|
||||
for _scenario_name, scenario_data in scenarios.items():
|
||||
for step in scenario_data.get("Steps", []):
|
||||
for substep in step.get("Substeps", []):
|
||||
# Prefer sub-technique over technique
|
||||
sub = substep.get("Subtechnique") or {}
|
||||
tech = substep.get("Technique") or {}
|
||||
tactic = substep.get("Tactic") or {}
|
||||
|
||||
technique_id = (
|
||||
sub.get("Subtechnique_Id")
|
||||
or tech.get("Technique_Id")
|
||||
or ""
|
||||
).strip()
|
||||
technique_name = (
|
||||
sub.get("Subtechnique_Name")
|
||||
or tech.get("Technique_Name")
|
||||
or "Unknown"
|
||||
).strip()
|
||||
|
||||
if not technique_id:
|
||||
continue
|
||||
|
||||
detections = substep.get("Detections", [])
|
||||
best_score = 0
|
||||
best_type = "None"
|
||||
best_note = ""
|
||||
for det in detections:
|
||||
dtype = det.get("Detection_Type", "None")
|
||||
s = _score(dtype)
|
||||
if s > best_score:
|
||||
best_score = s
|
||||
best_type = dtype
|
||||
best_note = det.get("Detection_Note", "")
|
||||
|
||||
substeps.append(
|
||||
{
|
||||
"technique_id": technique_id,
|
||||
"technique_name": technique_name,
|
||||
"tactic_id": tactic.get("Tactic_Id", ""),
|
||||
"tactic_name": tactic.get("Tactic_Name", ""),
|
||||
"best_score": best_score,
|
||||
"detection_type": best_type,
|
||||
"note": best_note,
|
||||
}
|
||||
)
|
||||
|
||||
return substeps
|
||||
|
||||
|
||||
def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
|
||||
"""Aggregate substep results per technique — keep best detection score."""
|
||||
by_technique: dict[str, dict] = {}
|
||||
for sub in substeps:
|
||||
tid = sub["technique_id"]
|
||||
if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
|
||||
by_technique[tid] = sub
|
||||
return by_technique
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main import function
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def import_evaluation_round(
|
||||
db: Session,
|
||||
adversary_name: str,
|
||||
adversary_display: str,
|
||||
eval_round: int,
|
||||
current_user: User,
|
||||
) -> dict[str, Any]:
|
||||
"""Import a single ATT&CK Evaluation round for CrowdStrike into the platform.
|
||||
|
||||
Creates one Test per unique technique with the best detection result
|
||||
observed across all substeps for that technique. All tests land in
|
||||
``in_review`` state — Blue Leads must confirm before they count as coverage.
|
||||
|
||||
Returns a summary dict: created, skipped, techniques_covered.
|
||||
Raises if the round was already imported (idempotency guard).
|
||||
"""
|
||||
# Idempotency — refuse duplicate imports
|
||||
existing = (
|
||||
db.query(EvaluationImport)
|
||||
.filter(
|
||||
EvaluationImport.adversary_name == adversary_name.lower(),
|
||||
EvaluationImport.status == "completed",
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if existing:
|
||||
raise ValueError(
|
||||
f"Round '{adversary_display}' (round {eval_round}) was already imported "
|
||||
f"on {existing.imported_at.date()}. Re-import is not allowed."
|
||||
)
|
||||
|
||||
# Fetch and aggregate substep results
|
||||
substeps = fetch_results_for_adversary(adversary_name)
|
||||
by_technique = _aggregate_by_technique(substeps)
|
||||
|
||||
created = 0
|
||||
skipped = 0
|
||||
affected_technique_ids: set = set()
|
||||
|
||||
for mitre_id, agg in by_technique.items():
|
||||
# Look up the technique in our DB
|
||||
technique = (
|
||||
db.query(Technique)
|
||||
.filter(Technique.mitre_id == mitre_id.upper())
|
||||
.first()
|
||||
)
|
||||
if technique is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
detection_result = _score_to_result(agg["best_score"])
|
||||
|
||||
description = (
|
||||
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
|
||||
f"Vendor: CrowdStrike Falcon.\n"
|
||||
f"Detection type achieved: {agg['detection_type']}.\n\n"
|
||||
f"⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
|
||||
f"controlled MITRE lab environment against a simulated {adversary_display} "
|
||||
f"adversary. They do NOT represent your organisation's actual detection "
|
||||
f"capability. Validate in your own environment before approving."
|
||||
)
|
||||
if agg["note"]:
|
||||
description += f"\n\nMITRE note: {agg['note']}"
|
||||
|
||||
red_summary = (
|
||||
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
|
||||
f"Vendor: CrowdStrike Falcon\n"
|
||||
f"Best detection level: {agg['detection_type']}\n"
|
||||
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
|
||||
)
|
||||
|
||||
test = Test(
|
||||
technique_id=technique.id,
|
||||
name=f"[EVAL R{eval_round}] {adversary_display} — {technique.name}",
|
||||
description=description,
|
||||
platform=None,
|
||||
procedure_text=(
|
||||
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
|
||||
f"See evaluation report at https://evals.mitre.org for full details."
|
||||
),
|
||||
created_by=current_user.id,
|
||||
state=TestState.in_review,
|
||||
attack_success=True,
|
||||
red_summary=red_summary,
|
||||
red_validation_status="approved",
|
||||
red_validated_by=current_user.id,
|
||||
red_validated_at=datetime.utcnow(),
|
||||
detection_result=detection_result,
|
||||
blue_validation_status=None,
|
||||
execution_date=datetime.utcnow(),
|
||||
created_at=datetime.utcnow(),
|
||||
)
|
||||
db.add(test)
|
||||
db.flush()
|
||||
|
||||
log_action(
|
||||
db,
|
||||
user_id=current_user.id,
|
||||
action="eval_import_test",
|
||||
entity_type="test",
|
||||
entity_id=test.id,
|
||||
details={
|
||||
"adversary": adversary_name,
|
||||
"eval_round": eval_round,
|
||||
"mitre_id": mitre_id,
|
||||
"detection_type": agg["detection_type"],
|
||||
},
|
||||
)
|
||||
|
||||
affected_technique_ids.add(technique.id)
|
||||
created += 1
|
||||
|
||||
# Recalculate coverage for all touched techniques
|
||||
for tech_id in affected_technique_ids:
|
||||
tech = db.query(Technique).filter(Technique.id == tech_id).first()
|
||||
if tech:
|
||||
recalculate_technique_status(db, tech)
|
||||
|
||||
# Record the import
|
||||
record = EvaluationImport(
|
||||
id=uuid.uuid4(),
|
||||
adversary_name=adversary_name.lower(),
|
||||
adversary_display=adversary_display,
|
||||
eval_round=eval_round,
|
||||
imported_at=datetime.utcnow(),
|
||||
imported_by=current_user.id,
|
||||
tests_created=created,
|
||||
techniques_covered=len(affected_technique_ids),
|
||||
status="completed",
|
||||
notes=f"Skipped {skipped} techniques not found in local DB.",
|
||||
)
|
||||
db.add(record)
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
"ATT&CK Evaluation import complete — round %d (%s): %d tests created, %d skipped",
|
||||
eval_round, adversary_display, created, skipped,
|
||||
)
|
||||
return {
|
||||
"created": created,
|
||||
"skipped": skipped,
|
||||
"techniques_covered": len(affected_technique_ids),
|
||||
"adversary": adversary_display,
|
||||
"eval_round": eval_round,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# New-round check (used by the weekly scheduler)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_for_new_round(db: Session) -> dict[str, Any]:
|
||||
"""Check if a new evaluation round is available that hasn't been imported yet.
|
||||
|
||||
Returns:
|
||||
{"new_round_available": bool, "latest_round": dict | None, "already_imported": bool}
|
||||
"""
|
||||
try:
|
||||
latest = get_latest_round()
|
||||
except Exception as exc:
|
||||
logger.warning("Could not check for new ATT&CK Evaluation round: %s", exc)
|
||||
return {"new_round_available": False, "latest_round": None, "error": str(exc)}
|
||||
|
||||
already = (
|
||||
db.query(EvaluationImport)
|
||||
.filter(
|
||||
EvaluationImport.adversary_name == latest["name"].lower(),
|
||||
EvaluationImport.status == "completed",
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
return {
|
||||
"new_round_available": already is None,
|
||||
"already_imported": already is not None,
|
||||
"latest_round": {
|
||||
"name": latest["name"],
|
||||
"display_name": latest.get("display_name", latest["name"]),
|
||||
"eval_round": latest["eval_round"],
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user