feat(evaluations): enrich eval tests with attack path, criteria and data sources

- Capture Step.Description (HTML stripped), step name/number, substep ref, criteria, and data sources from MITRE ATT&CK Evaluations API - _aggregate_by_technique() now accumulates ALL occurrences per technique (multiple substep refs, criteria, step contexts) instead of keeping only the best-scoring one - New helper functions _build_procedure_text(), _build_description(), _build_red_summary() generate rich narratives from accumulated occurrences - New re_enrich_evaluation_round() service function + POST endpoint /system/attck-evaluations/re-enrich to update already-imported tests without changing detection results or validation state - Frontend: Re-enrich button per imported round + result banner in SystemPage
2026-06-08 11:42:08 +02:00
parent 467afc334d
commit e2861a08bc
4 changed files with 366 additions and 26 deletions
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
    return {"pending": count}


+@router.post("/attck-evaluations/re-enrich")
+def re_enrich_evaluation_round(
+    payload: dict,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(require_role("admin")),
+):
+    """Re-enrich already-imported evaluation tests with rich data from the MITRE API.
+
+    Updates procedure_text (attack path + criteria), description (data sources +
+    substep references) and red_summary — without changing detection results,
+    state or validation status.
+
+    Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
+
+    Useful to upgrade tests that were imported before the enrichment feature
+    was added.
+    """
+    from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
+
+    adversary_name = payload.get("adversary_name", "")
+    adversary_display = payload.get("adversary_display", adversary_name)
+    eval_round = payload.get("eval_round", 0)
+
+    if not adversary_name or not eval_round:
+        raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
+
+    try:
+        summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
+    except Exception as exc:
+        logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
+        raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
+
+    return summary
+
+
@router.post("/email-test")
 def send_test_email(
    payload: EmailTestRequest,
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
 """

 import logging
+import re
 import uuid
 from datetime import datetime
 from typing import Any
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
    scenarios = target.get("Detections_By_Step", {})
    for _scenario_name, scenario_data in scenarios.items():
        for step in scenario_data.get("Steps", []):
+            step_num = step.get("Step_Num", "")
+            step_name = step.get("Step_Name", "")
+            # Strip HTML tags from the Step.Description narrative
+            step_desc_raw = step.get("Description") or ""
+            step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
+            step_description = re.sub(r"\s+", " ", step_description).strip()
+
            for substep in step.get("Substeps", []):
                # Prefer sub-technique over technique
                sub = substep.get("Subtechnique") or {}
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        best_type = dtype
                        best_note = det.get("Detection_Note", "")

+                # Collect all unique data sources from screenshots across all detections
+                data_sources: list[str] = sorted({
+                    src
+                    for det in detections
+                    for sc in det.get("Screenshots", [])
+                    for src in sc.get("Data_Sources", [])
+                })
+
                substeps.append(
                    {
                        "technique_id": technique_id,
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        "best_score": best_score,
                        "detection_type": best_type,
                        "note": best_note,
+                        # Enrichment fields from the API
+                        "step_num": step_num,
+                        "step_name": step_name,
+                        "step_description": step_description,
+                        "substep_ref": substep.get("Substep", ""),
+                        "criteria": (substep.get("Criteria") or "").strip(),
+                        "data_sources": data_sources,
                    }
                )

@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:


 def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
-    """Aggregate substep results per technique — keep best detection score."""
+    """Aggregate substep results per technique.
+
+    Keeps the best detection score and accumulates ALL occurrences so that
+    the importer can build a rich attack-path narrative in procedure_text.
+    """
    by_technique: dict[str, dict] = {}
    for sub in substeps:
        tid = sub["technique_id"]
-        if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
-            by_technique[tid] = sub
+        if tid not in by_technique:
+            by_technique[tid] = {**sub, "occurrences": []}
+
+        # Always record this occurrence for the narrative
+        by_technique[tid]["occurrences"].append({
+            "substep_ref": sub["substep_ref"],
+            "step_num": sub["step_num"],
+            "step_name": sub["step_name"],
+            "step_description": sub["step_description"],
+            "criteria": sub["criteria"],
+            "data_sources": sub["data_sources"],
+            "detection_type": sub["detection_type"],
+            "best_score": sub["best_score"],
+            "note": sub["note"],
+        })
+
+        # Promote to best detection if this substep scored higher
+        if sub["best_score"] > by_technique[tid]["best_score"]:
+            by_technique[tid]["best_score"] = sub["best_score"]
+            by_technique[tid]["detection_type"] = sub["detection_type"]
+            by_technique[tid]["note"] = sub["note"]
+            by_technique[tid]["tactic_id"] = sub["tactic_id"]
+            by_technique[tid]["tactic_name"] = sub["tactic_name"]
+
    return by_technique


+def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build a rich attack-path narrative for the Test.procedure_text field."""
+    occurrences = agg.get("occurrences", [])
+    if not occurrences:
+        return (
+            f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
+            f"See evaluation report at https://evals.mitre.org for full details."
+        )
+
+    lines: list[str] = []
+    lines.append(f"ATT&CK Evaluation R{eval_round} — {adversary_display}\n")
+
+    # Include step description(s) — deduplicated, one per step
+    seen_steps: set = set()
+    for occ in occurrences:
+        step_key = str(occ.get("step_num", ""))
+        step_name = occ.get("step_name", "")
+        step_desc = occ.get("step_description", "")
+        if step_key and step_key not in seen_steps and step_desc:
+            seen_steps.add(step_key)
+            truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
+            lines.append(f"Step {step_key} — {step_name}:")
+            lines.append(truncated)
+            lines.append("")
+
+    # List all attack criteria for this technique
+    lines.append("Attack steps observed:")
+    for occ in occurrences:
+        ref = occ.get("substep_ref", "")
+        criteria = occ.get("criteria", "")
+        step_name = occ.get("step_name", "")
+        if criteria:
+            prefix = f"[{ref}]" if ref else "•"
+            lines.append(f"  {prefix} {criteria}")
+            if step_name:
+                lines.append(f"       ↳ Step: {step_name}")
+
+    return "\n".join(lines)
+
+
+def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build the full Test.description with detection details and attack path."""
+    occurrences = agg.get("occurrences", [])
+
+    # Collect all unique data sources across every occurrence of this technique
+    all_data_sources: list[str] = sorted({
+        src
+        for occ in occurrences
+        for src in occ.get("data_sources", [])
+    })
+
+    header = (
+        f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
+        f"Vendor: CrowdStrike Falcon.\n"
+        f"Detection type achieved: {agg['detection_type']}."
+    )
+
+    ds_section = ""
+    if all_data_sources:
+        ds_section = "\n\nData sources observed:\n" + "\n".join(
+            f"  • {ds}" for ds in all_data_sources
+        )
+
+    # Attack path / substep criteria section
+    path_lines: list[str] = []
+    for occ in occurrences:
+        ref = occ.get("substep_ref", "")
+        criteria = occ.get("criteria", "")
+        step_name = occ.get("step_name", "")
+        det_type = occ.get("detection_type", "")
+        if criteria:
+            label = f"[{ref}]" if ref else "•"
+            step_label = f" ({step_name})" if step_name else ""
+            det_label = f" — {det_type}" if det_type and det_type.lower() != "none" else ""
+            path_lines.append(f"  {label}{step_label}{det_label}:")
+            path_lines.append(f"    {criteria}")
+
+    path_section = ""
+    if path_lines:
+        path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
+
+    warning = (
+        f"\n\n⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
+        f"controlled MITRE lab environment against a simulated {adversary_display} "
+        f"adversary. They do NOT represent your organisation's actual detection "
+        f"capability. Validate in your own environment before approving."
+    )
+
+    note_section = ""
+    if agg.get("note"):
+        note_section = f"\n\nMITRE note: {agg['note']}"
+
+    return header + ds_section + path_section + warning + note_section
+
+
+def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build the Red Team summary for the Test.red_summary field."""
+    occurrences = agg.get("occurrences", [])
+
+    lines = [
+        f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
+        f"Vendor: CrowdStrike Falcon",
+        f"Best detection level: {agg['detection_type']}",
+        f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
+    ]
+
+    if occurrences:
+        lines.append("")
+        lines.append("Substeps:")
+        for occ in occurrences:
+            ref = occ.get("substep_ref", "")
+            criteria = occ.get("criteria", "")
+            step_name = occ.get("step_name", "")
+            det = occ.get("detection_type", "")
+            if criteria:
+                tag = f"[{ref}]" if ref else "•"
+                step_tag = f" {step_name}:" if step_name else ""
+                det_tag = f" [{det}]" if det and det.lower() != "none" else ""
+                lines.append(f"  {tag}{step_tag}{det_tag} {criteria}")
+
+    return "\n".join(lines)
+
+
 # ---------------------------------------------------------------------------
 # Main import function
 # ---------------------------------------------------------------------------
@@ -387,34 +559,16 @@ def import_evaluation_round(

        detection_result = _score_to_result(agg["best_score"])

-        description = (
-            f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
-            f"Vendor: CrowdStrike Falcon.\n"
-            f"Detection type achieved: {agg['detection_type']}.\n\n"
-            f"⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
-            f"controlled MITRE lab environment against a simulated {adversary_display} "
-            f"adversary. They do NOT represent your organisation's actual detection "
-            f"capability. Validate in your own environment before approving."
-        )
-        if agg["note"]:
-            description += f"\n\nMITRE note: {agg['note']}"
-
-        red_summary = (
-            f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
-            f"Vendor: CrowdStrike Falcon\n"
-            f"Best detection level: {agg['detection_type']}\n"
-            f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
-        )
+        description = _build_description(agg, adversary_display, eval_round)
+        red_summary = _build_red_summary(agg, adversary_display, eval_round)
+        procedure_text = _build_procedure_text(agg, adversary_display, eval_round)

        test = Test(
            technique_id=technique.id,
            name=f"[EVAL R{eval_round}] {adversary_display} — {technique.name}",
            description=description,
            platform=None,
-            procedure_text=(
-                f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
-                f"See evaluation report at https://evals.mitre.org for full details."
-            ),
+            procedure_text=procedure_text,
            created_by=current_user.id,
            state=TestState.in_review,
            attack_success=True,
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
            "eval_round": latest["eval_round"],
        },
    }
+
+
+# ---------------------------------------------------------------------------
+# Re-enrich existing tests with richer API data
+# ---------------------------------------------------------------------------
+
+
+def re_enrich_evaluation_round(
+    db: Session,
+    adversary_name: str,
+    adversary_display: str,
+    eval_round: int,
+    current_user: User,
+) -> dict[str, Any]:
+    """Update procedure_text / description / red_summary on already-imported tests
+    for a given round using the enriched API data (attack path, criteria, data sources).
+
+    This is non-destructive — it only updates the three narrative fields and does
+    not change detection results, state, or validation status.
+    """
+    # Fetch & aggregate (same flow as import)
+    substeps = fetch_results_for_adversary(adversary_name)
+    by_technique = _aggregate_by_technique(substeps)
+
+    updated = 0
+    skipped = 0
+
+    for mitre_id, agg in by_technique.items():
+        technique = (
+            db.query(Technique)
+            .filter(Technique.mitre_id == mitre_id.upper())
+            .first()
+        )
+        if technique is None:
+            skipped += 1
+            continue
+
+        # Find the existing test for this round + technique
+        existing_test = (
+            db.query(Test)
+            .filter(
+                Test.technique_id == technique.id,
+                Test.name.like(f"[EVAL R{eval_round}]%"),
+            )
+            .first()
+        )
+        if not existing_test:
+            skipped += 1
+            continue
+
+        existing_test.description = _build_description(agg, adversary_display, eval_round)
+        existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
+        existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
+        updated += 1
+
+    db.commit()
+
+    logger.info(
+        "Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
+        eval_round, adversary_display, updated, skipped,
+    )
+    return {
+        "updated": updated,
+        "skipped": skipped,
+        "adversary": adversary_display,
+        "eval_round": eval_round,
+        "message": (
+            f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
+            f"with attack path, criteria and data sources from MITRE API."
+        ),
+    }