feat(evaluations): enrich eval tests with attack path, criteria and data sources

- Capture Step.Description (HTML stripped), step name/number, substep ref, criteria, and data sources from MITRE ATT&CK Evaluations API - _aggregate_by_technique() now accumulates ALL occurrences per technique (multiple substep refs, criteria, step contexts) instead of keeping only the best-scoring one - New helper functions _build_procedure_text(), _build_description(), _build_red_summary() generate rich narratives from accumulated occurrences - New re_enrich_evaluation_round() service function + POST endpoint /system/attck-evaluations/re-enrich to update already-imported tests without changing detection results or validation state - Frontend: Re-enrich button per imported round + result banner in SystemPage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 11:42:08 +02:00
parent 72983a022b
commit 7703c36ed7
4 changed files with 366 additions and 26 deletions
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
    return {"pending": count}


+@router.post("/attck-evaluations/re-enrich")
+def re_enrich_evaluation_round(
+    payload: dict,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(require_role("admin")),
+):
+    """Re-enrich already-imported evaluation tests with rich data from the MITRE API.
+
+    Updates procedure_text (attack path + criteria), description (data sources +
+    substep references) and red_summary — without changing detection results,
+    state or validation status.
+
+    Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
+
+    Useful to upgrade tests that were imported before the enrichment feature
+    was added.
+    """
+    from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
+
+    adversary_name = payload.get("adversary_name", "")
+    adversary_display = payload.get("adversary_display", adversary_name)
+    eval_round = payload.get("eval_round", 0)
+
+    if not adversary_name or not eval_round:
+        raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
+
+    try:
+        summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
+    except Exception as exc:
+        logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
+        raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
+
+    return summary
+
+
@router.post("/email-test")
 def send_test_email(
    payload: EmailTestRequest,
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
 """

 import logging
+import re
 import uuid
 from datetime import datetime
 from typing import Any
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
    scenarios = target.get("Detections_By_Step", {})
    for _scenario_name, scenario_data in scenarios.items():
        for step in scenario_data.get("Steps", []):
+            step_num = step.get("Step_Num", "")
+            step_name = step.get("Step_Name", "")
+            # Strip HTML tags from the Step.Description narrative
+            step_desc_raw = step.get("Description") or ""
+            step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
+            step_description = re.sub(r"\s+", " ", step_description).strip()
+
            for substep in step.get("Substeps", []):
                # Prefer sub-technique over technique
                sub = substep.get("Subtechnique") or {}
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        best_type = dtype
                        best_note = det.get("Detection_Note", "")

+                # Collect all unique data sources from screenshots across all detections
+                data_sources: list[str] = sorted({
+                    src
+                    for det in detections
+                    for sc in det.get("Screenshots", [])
+                    for src in sc.get("Data_Sources", [])
+                })
+
                substeps.append(
                    {
                        "technique_id": technique_id,
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        "best_score": best_score,
                        "detection_type": best_type,
                        "note": best_note,
+                        # Enrichment fields from the API
+                        "step_num": step_num,
+                        "step_name": step_name,
+                        "step_description": step_description,
+                        "substep_ref": substep.get("Substep", ""),
+                        "criteria": (substep.get("Criteria") or "").strip(),
+                        "data_sources": data_sources,
                    }
                )

@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:


 def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
-    """Aggregate substep results per technique — keep best detection score."""
+    """Aggregate substep results per technique.
+
+    Keeps the best detection score and accumulates ALL occurrences so that
+    the importer can build a rich attack-path narrative in procedure_text.
+    """
    by_technique: dict[str, dict] = {}
    for sub in substeps:
        tid = sub["technique_id"]
-        if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
-            by_technique[tid] = sub
+        if tid not in by_technique:
+            by_technique[tid] = {**sub, "occurrences": []}
+
+        # Always record this occurrence for the narrative
+        by_technique[tid]["occurrences"].append({
+            "substep_ref": sub["substep_ref"],
+            "step_num": sub["step_num"],
+            "step_name": sub["step_name"],
+            "step_description": sub["step_description"],
+            "criteria": sub["criteria"],
+            "data_sources": sub["data_sources"],
+            "detection_type": sub["detection_type"],
+            "best_score": sub["best_score"],
+            "note": sub["note"],
+        })
+
+        # Promote to best detection if this substep scored higher
+        if sub["best_score"] > by_technique[tid]["best_score"]:
+            by_technique[tid]["best_score"] = sub["best_score"]
+            by_technique[tid]["detection_type"] = sub["detection_type"]
+            by_technique[tid]["note"] = sub["note"]
+            by_technique[tid]["tactic_id"] = sub["tactic_id"]
+            by_technique[tid]["tactic_name"] = sub["tactic_name"]
+
    return by_technique


+def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build a rich attack-path narrative for the Test.procedure_text field."""
+    occurrences = agg.get("occurrences", [])
+    if not occurrences:
+        return (
+            f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
+            f"See evaluation report at https://evals.mitre.org for full details."
+        )
+
+    lines: list[str] = []
+    lines.append(f"ATT&CK Evaluation R{eval_round} — {adversary_display}\n")
+
+    # Include step description(s) — deduplicated, one per step
+    seen_steps: set = set()
+    for occ in occurrences:
+        step_key = str(occ.get("step_num", ""))
+        step_name = occ.get("step_name", "")
+        step_desc = occ.get("step_description", "")
+        if step_key and step_key not in seen_steps and step_desc:
+            seen_steps.add(step_key)
+            truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
+            lines.append(f"Step {step_key} — {step_name}:")
+            lines.append(truncated)
+            lines.append("")
+
+    # List all attack criteria for this technique
+    lines.append("Attack steps observed:")
+    for occ in occurrences:
+        ref = occ.get("substep_ref", "")
+        criteria = occ.get("criteria", "")
+        step_name = occ.get("step_name", "")
+        if criteria:
+            prefix = f"[{ref}]" if ref else "•"
+            lines.append(f"  {prefix} {criteria}")
+            if step_name:
+                lines.append(f"       ↳ Step: {step_name}")
+
+    return "\n".join(lines)
+
+
+def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build the full Test.description with detection details and attack path."""
+    occurrences = agg.get("occurrences", [])
+
+    # Collect all unique data sources across every occurrence of this technique
+    all_data_sources: list[str] = sorted({
+        src
+        for occ in occurrences
+        for src in occ.get("data_sources", [])
+    })
+
+    header = (
+        f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
+        f"Vendor: CrowdStrike Falcon.\n"
+        f"Detection type achieved: {agg['detection_type']}."
+    )
+
+    ds_section = ""
+    if all_data_sources:
+        ds_section = "\n\nData sources observed:\n" + "\n".join(
+            f"  • {ds}" for ds in all_data_sources
+        )
+
+    # Attack path / substep criteria section
+    path_lines: list[str] = []
+    for occ in occurrences:
+        ref = occ.get("substep_ref", "")
+        criteria = occ.get("criteria", "")
+        step_name = occ.get("step_name", "")
+        det_type = occ.get("detection_type", "")
+        if criteria:
+            label = f"[{ref}]" if ref else "•"
+            step_label = f" ({step_name})" if step_name else ""
+            det_label = f" — {det_type}" if det_type and det_type.lower() != "none" else ""
+            path_lines.append(f"  {label}{step_label}{det_label}:")
+            path_lines.append(f"    {criteria}")
+
+    path_section = ""
+    if path_lines:
+        path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
+
+    warning = (
+        f"\n\n⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
+        f"controlled MITRE lab environment against a simulated {adversary_display} "
+        f"adversary. They do NOT represent your organisation's actual detection "
+        f"capability. Validate in your own environment before approving."
+    )
+
+    note_section = ""
+    if agg.get("note"):
+        note_section = f"\n\nMITRE note: {agg['note']}"
+
+    return header + ds_section + path_section + warning + note_section
+
+
+def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
+    """Build the Red Team summary for the Test.red_summary field."""
+    occurrences = agg.get("occurrences", [])
+
+    lines = [
+        f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
+        f"Vendor: CrowdStrike Falcon",
+        f"Best detection level: {agg['detection_type']}",
+        f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
+    ]
+
+    if occurrences:
+        lines.append("")
+        lines.append("Substeps:")
+        for occ in occurrences:
+            ref = occ.get("substep_ref", "")
+            criteria = occ.get("criteria", "")
+            step_name = occ.get("step_name", "")
+            det = occ.get("detection_type", "")
+            if criteria:
+                tag = f"[{ref}]" if ref else "•"
+                step_tag = f" {step_name}:" if step_name else ""
+                det_tag = f" [{det}]" if det and det.lower() != "none" else ""
+                lines.append(f"  {tag}{step_tag}{det_tag} {criteria}")
+
+    return "\n".join(lines)
+
+
 # ---------------------------------------------------------------------------
 # Main import function
 # ---------------------------------------------------------------------------
@@ -387,34 +559,16 @@ def import_evaluation_round(

        detection_result = _score_to_result(agg["best_score"])

-        description = (
-            f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
-            f"Vendor: CrowdStrike Falcon.\n"
-            f"Detection type achieved: {agg['detection_type']}.\n\n"
-            f"⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
-            f"controlled MITRE lab environment against a simulated {adversary_display} "
-            f"adversary. They do NOT represent your organisation's actual detection "
-            f"capability. Validate in your own environment before approving."
-        )
-        if agg["note"]:
-            description += f"\n\nMITRE note: {agg['note']}"
-
-        red_summary = (
-            f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
-            f"Vendor: CrowdStrike Falcon\n"
-            f"Best detection level: {agg['detection_type']}\n"
-            f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
-        )
+        description = _build_description(agg, adversary_display, eval_round)
+        red_summary = _build_red_summary(agg, adversary_display, eval_round)
+        procedure_text = _build_procedure_text(agg, adversary_display, eval_round)

        test = Test(
            technique_id=technique.id,
            name=f"[EVAL R{eval_round}] {adversary_display} — {technique.name}",
            description=description,
            platform=None,
-            procedure_text=(
-                f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
-                f"See evaluation report at https://evals.mitre.org for full details."
-            ),
+            procedure_text=procedure_text,
            created_by=current_user.id,
            state=TestState.in_review,
            attack_success=True,
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
            "eval_round": latest["eval_round"],
        },
    }
+
+
+# ---------------------------------------------------------------------------
+# Re-enrich existing tests with richer API data
+# ---------------------------------------------------------------------------
+
+
+def re_enrich_evaluation_round(
+    db: Session,
+    adversary_name: str,
+    adversary_display: str,
+    eval_round: int,
+    current_user: User,
+) -> dict[str, Any]:
+    """Update procedure_text / description / red_summary on already-imported tests
+    for a given round using the enriched API data (attack path, criteria, data sources).
+
+    This is non-destructive — it only updates the three narrative fields and does
+    not change detection results, state, or validation status.
+    """
+    # Fetch & aggregate (same flow as import)
+    substeps = fetch_results_for_adversary(adversary_name)
+    by_technique = _aggregate_by_technique(substeps)
+
+    updated = 0
+    skipped = 0
+
+    for mitre_id, agg in by_technique.items():
+        technique = (
+            db.query(Technique)
+            .filter(Technique.mitre_id == mitre_id.upper())
+            .first()
+        )
+        if technique is None:
+            skipped += 1
+            continue
+
+        # Find the existing test for this round + technique
+        existing_test = (
+            db.query(Test)
+            .filter(
+                Test.technique_id == technique.id,
+                Test.name.like(f"[EVAL R{eval_round}]%"),
+            )
+            .first()
+        )
+        if not existing_test:
+            skipped += 1
+            continue
+
+        existing_test.description = _build_description(agg, adversary_display, eval_round)
+        existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
+        existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
+        updated += 1
+
+    db.commit()
+
+    logger.info(
+        "Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
+        eval_round, adversary_display, updated, skipped,
+    )
+    return {
+        "updated": updated,
+        "skipped": skipped,
+        "adversary": adversary_display,
+        "eval_round": eval_round,
+        "message": (
+            f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
+            f"with attack path, criteria and data sources from MITRE API."
+        ),
+    }
@@ -109,6 +109,14 @@ export interface BulkApproveResult {
  message: string;
 }

+export interface ReEnrichResult {
+  updated: number;
+  skipped: number;
+  adversary: string;
+  eval_round: number;
+  message: string;
+}
+
 /** Bulk-approve all in-review evaluation tests (Blue Team side). */
 export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
  const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
@@ -120,3 +128,13 @@ export async function getEvalPendingCount(): Promise<{ pending: number }> {
  const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
  return data;
 }
+
+/** Re-enrich an already-imported round with attack path, criteria and data sources. */
+export async function reEnrichEvaluationRound(payload: {
+  adversary_name: string;
+  adversary_display: string;
+  eval_round: number;
+}): Promise<ReEnrichResult> {
+  const { data } = await client.post<ReEnrichResult>("/system/attck-evaluations/re-enrich", payload);
+  return data;
+}
@@ -39,6 +39,7 @@ import {
  checkNewEvaluationRound,
  bulkApproveEvaluationTests,
  getEvalPendingCount,
+  reEnrichEvaluationRound,
  type SyncMitreResponse,
  type IntelScanResponse,
  type EvaluationRound,
@@ -46,6 +47,7 @@ import {
  type EvaluationImportResult,
  type NewRoundCheckResult,
  type BulkApproveResult,
+  type ReEnrichResult,
 } from "../api/system";
 import {
  getTemplateStats,
@@ -74,6 +76,8 @@ export default function SystemPage() {
  const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
  const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
  const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
+  const [reEnrichingRound, setReEnrichingRound] = useState<string | null>(null);
+  const [reEnrichResult, setReEnrichResult] = useState<ReEnrichResult | null>(null);

  // ── Existing queries ─────────────────────────────────────────────
  const {
@@ -241,6 +245,18 @@ export default function SystemPage() {
    },
  });

+  const reEnrichMutation = useMutation({
+    mutationFn: (payload: { adversary_name: string; adversary_display: string; eval_round: number }) =>
+      reEnrichEvaluationRound(payload),
+    onSuccess: (data) => {
+      setReEnrichResult(data);
+      setReEnrichingRound(null);
+    },
+    onError: () => {
+      setReEnrichingRound(null);
+    },
+  });
+
  const formatNextRun = (dateStr: string | null) => {
    if (!dateStr) return "Not scheduled";
    const date = new Date(dateStr);
@@ -561,6 +577,33 @@ export default function SystemPage() {
          </div>
        )}

+        {/* Re-enrich result */}
+        {reEnrichResult && (
+          <div className="mb-4 rounded-lg border border-blue-500/30 bg-blue-900/20 p-4">
+            <div className="flex items-center gap-2 mb-2">
+              <Sparkles className="h-4 w-4 text-blue-400" />
+              <span className="text-sm font-medium text-blue-400">Re-enrichment complete</span>
+            </div>
+            <div className="grid grid-cols-2 gap-3 text-center text-sm">
+              <div>
+                <p className="text-xl font-bold text-white">{reEnrichResult.updated}</p>
+                <p className="text-xs text-gray-400">Tests enriched</p>
+              </div>
+              <div>
+                <p className="text-sm font-medium text-blue-400 truncate">{reEnrichResult.adversary}</p>
+                <p className="text-xs text-gray-400">Round {reEnrichResult.eval_round}</p>
+              </div>
+            </div>
+            <p className="mt-2 text-xs text-gray-400">{reEnrichResult.message}</p>
+            <button
+              onClick={() => setReEnrichResult(null)}
+              className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
+            >
+              Dismiss
+            </button>
+          </div>
+        )}
+
        {/* Import result feedback */}
        {evalImportResult && (
          <div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
@@ -685,7 +728,26 @@ export default function SystemPage() {
                      </td>
                      <td className="py-3 pl-4">
                        {round.imported ? (
-                          <span className="text-xs text-gray-600 italic">Already imported</span>
+                          <button
+                            onClick={() => {
+                              setReEnrichingRound(round.name);
+                              reEnrichMutation.mutate({
+                                adversary_name: round.name,
+                                adversary_display: round.display_name,
+                                eval_round: round.eval_round,
+                              });
+                            }}
+                            disabled={reEnrichMutation.isPending || importRoundMutation.isPending}
+                            className="flex items-center gap-1.5 rounded-lg border border-blue-500/30 bg-blue-900/20 px-3 py-1.5 text-xs font-medium text-blue-400 hover:bg-blue-900/40 disabled:opacity-50 transition-colors"
+                            title="Update existing tests with attack path, criteria and data sources from MITRE API"
+                          >
+                            {reEnrichMutation.isPending && reEnrichingRound === round.name ? (
+                              <Loader2 className="h-3.5 w-3.5 animate-spin" />
+                            ) : (
+                              <Sparkles className="h-3.5 w-3.5" />
+                            )}
+                            Re-enrich
+                          </button>
                        ) : (
                          <button
                            onClick={() => {