feat(evaluations): enrich eval tests with attack path, criteria and data sources

- Capture Step.Description (HTML stripped), step name/number, substep ref, criteria, and data sources from MITRE ATT&CK Evaluations API - _aggregate_by_technique() now accumulates ALL occurrences per technique (multiple substep refs, criteria, step contexts) instead of keeping only the best-scoring one - New helper functions _build_procedure_text(), _build_description(), _build_red_summary() generate rich narratives from accumulated occurrences - New re_enrich_evaluation_round() service function + POST endpoint /system/attck-evaluations/re-enrich to update already-imported tests without changing detection results or validation state - Frontend: Re-enrich button per imported round + result banner in SystemPage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 11:42:08 +02:00
parent 72983a022b
commit 7703c36ed7
4 changed files with 366 additions and 26 deletions
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
    return {"pending": count}
@router.post("/attck-evaluations/re-enrich")
 def re_enrich_evaluation_round(
    payload: dict,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """Re-enrich already-imported evaluation tests with rich data from the MITRE API.
    Updates procedure_text (attack path + criteria), description (data sources +
    substep references) and red_summary — without changing detection results,
    state or validation status.
    Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
    Useful to upgrade tests that were imported before the enrichment feature
    was added.
    """
    from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
    adversary_name = payload.get("adversary_name", "")
    adversary_display = payload.get("adversary_display", adversary_name)
    eval_round = payload.get("eval_round", 0)
    if not adversary_name or not eval_round:
        raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
    try:
        summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
    except Exception as exc:
        logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
        raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
    return summary
@router.post("/email-test")
 def send_test_email(
    payload: EmailTestRequest,
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
 """
 import logging
 import re
 import uuid
 from datetime import datetime
 from typing import Any
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
    scenarios = target.get("Detections_By_Step", {})
    for _scenario_name, scenario_data in scenarios.items():
        for step in scenario_data.get("Steps", []):
            step_num = step.get("Step_Num", "")
            step_name = step.get("Step_Name", "")
            # Strip HTML tags from the Step.Description narrative
            step_desc_raw = step.get("Description") or ""
            step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
            step_description = re.sub(r"\s+", " ", step_description).strip()
            for substep in step.get("Substeps", []):
                # Prefer sub-technique over technique
                sub = substep.get("Subtechnique") or {}
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        best_type = dtype
                        best_note = det.get("Detection_Note", "")
                # Collect all unique data sources from screenshots across all detections
                data_sources: list[str] = sorted({
                    src
                    for det in detections
                    for sc in det.get("Screenshots", [])
                    for src in sc.get("Data_Sources", [])
                })
                substeps.append(
                    {
                        "technique_id": technique_id,
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
                        "best_score": best_score,
                        "detection_type": best_type,
                        "note": best_note,
                        # Enrichment fields from the API
                        "step_num": step_num,
                        "step_name": step_name,
                        "step_description": step_description,
                        "substep_ref": substep.get("Substep", ""),
                        "criteria": (substep.get("Criteria") or "").strip(),
                        "data_sources": data_sources,
                    }
                )
@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
 def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
-    """Aggregate substep results per technique — keep best detection score."""
+    """Aggregate substep results per technique.
    Keeps the best detection score and accumulates ALL occurrences so that
    the importer can build a rich attack-path narrative in procedure_text.
    """
    by_technique: dict[str, dict] = {}
    for sub in substeps:
        tid = sub["technique_id"]
-        if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
+        if tid not in by_technique:
-            by_technique[tid] = sub
+            by_technique[tid] = {**sub, "occurrences": []}
        # Always record this occurrence for the narrative
        by_technique[tid]["occurrences"].append({
            "substep_ref": sub["substep_ref"],
            "step_num": sub["step_num"],
            "step_name": sub["step_name"],
            "step_description": sub["step_description"],
            "criteria": sub["criteria"],
            "data_sources": sub["data_sources"],
            "detection_type": sub["detection_type"],
            "best_score": sub["best_score"],
            "note": sub["note"],
        })
        # Promote to best detection if this substep scored higher
        if sub["best_score"] > by_technique[tid]["best_score"]:
            by_technique[tid]["best_score"] = sub["best_score"]
            by_technique[tid]["detection_type"] = sub["detection_type"]
            by_technique[tid]["note"] = sub["note"]
            by_technique[tid]["tactic_id"] = sub["tactic_id"]
            by_technique[tid]["tactic_name"] = sub["tactic_name"]
    return by_technique
 def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
    """Build a rich attack-path narrative for the Test.procedure_text field."""
    occurrences = agg.get("occurrences", [])
    if not occurrences:
        return (
            f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
            f"See evaluation report at https://evals.mitre.org for full details."
        )
    lines: list[str] = []
    lines.append(f"ATT&CK Evaluation R{eval_round} — {adversary_display}\n")
    # Include step description(s) — deduplicated, one per step
    seen_steps: set = set()
    for occ in occurrences:
        step_key = str(occ.get("step_num", ""))
        step_name = occ.get("step_name", "")
        step_desc = occ.get("step_description", "")
        if step_key and step_key not in seen_steps and step_desc:
            seen_steps.add(step_key)
            truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
            lines.append(f"Step {step_key} — {step_name}:")
            lines.append(truncated)
            lines.append("")
    # List all attack criteria for this technique
    lines.append("Attack steps observed:")
    for occ in occurrences:
        ref = occ.get("substep_ref", "")
        criteria = occ.get("criteria", "")
        step_name = occ.get("step_name", "")
        if criteria:
            prefix = f"[{ref}]" if ref else "•"
            lines.append(f"  {prefix} {criteria}")
            if step_name:
                lines.append(f"       ↳ Step: {step_name}")
    return "\n".join(lines)
 def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
    """Build the full Test.description with detection details and attack path."""
    occurrences = agg.get("occurrences", [])
    # Collect all unique data sources across every occurrence of this technique
    all_data_sources: list[str] = sorted({
        src
        for occ in occurrences
        for src in occ.get("data_sources", [])
    })
    header = (
        f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
        f"Vendor: CrowdStrike Falcon.\n"
        f"Detection type achieved: {agg['detection_type']}."
    )
    ds_section = ""
    if all_data_sources:
        ds_section = "\n\nData sources observed:\n" + "\n".join(
            f"  • {ds}" for ds in all_data_sources
        )
    # Attack path / substep criteria section
    path_lines: list[str] = []
    for occ in occurrences:
        ref = occ.get("substep_ref", "")
        criteria = occ.get("criteria", "")
        step_name = occ.get("step_name", "")
        det_type = occ.get("detection_type", "")
        if criteria:
            label = f"[{ref}]" if ref else "•"
            step_label = f" ({step_name})" if step_name else ""
            det_label = f" — {det_type}" if det_type and det_type.lower() != "none" else ""
            path_lines.append(f"  {label}{step_label}{det_label}:")
            path_lines.append(f"    {criteria}")
    path_section = ""
    if path_lines:
        path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
    warning = (
        f"\n\n⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
        f"controlled MITRE lab environment against a simulated {adversary_display} "
        f"adversary. They do NOT represent your organisation's actual detection "
        f"capability. Validate in your own environment before approving."
    )
    note_section = ""
    if agg.get("note"):
        note_section = f"\n\nMITRE note: {agg['note']}"
    return header + ds_section + path_section + warning + note_section
 def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
    """Build the Red Team summary for the Test.red_summary field."""
    occurrences = agg.get("occurrences", [])
    lines = [
        f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
        f"Vendor: CrowdStrike Falcon",
        f"Best detection level: {agg['detection_type']}",
        f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
    ]
    if occurrences:
        lines.append("")
        lines.append("Substeps:")
        for occ in occurrences:
            ref = occ.get("substep_ref", "")
            criteria = occ.get("criteria", "")
            step_name = occ.get("step_name", "")
            det = occ.get("detection_type", "")
            if criteria:
                tag = f"[{ref}]" if ref else "•"
                step_tag = f" {step_name}:" if step_name else ""
                det_tag = f" [{det}]" if det and det.lower() != "none" else ""
                lines.append(f"  {tag}{step_tag}{det_tag} {criteria}")
    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Main import function
 # ---------------------------------------------------------------------------
@@ -387,34 +559,16 @@ def import_evaluation_round(
        detection_result = _score_to_result(agg["best_score"])
-        description = (
+        description = _build_description(agg, adversary_display, eval_round)
-            f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
+        red_summary = _build_red_summary(agg, adversary_display, eval_round)
-            f"Vendor: CrowdStrike Falcon.\n"
+        procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
            f"Detection type achieved: {agg['detection_type']}.\n\n"
            f"⚠️  IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
            f"controlled MITRE lab environment against a simulated {adversary_display} "
            f"adversary. They do NOT represent your organisation's actual detection "
            f"capability. Validate in your own environment before approving."
        )
        if agg["note"]:
            description += f"\n\nMITRE note: {agg['note']}"
        red_summary = (
            f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
            f"Vendor: CrowdStrike Falcon\n"
            f"Best detection level: {agg['detection_type']}\n"
            f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
        )
        test = Test(
            technique_id=technique.id,
            name=f"[EVAL R{eval_round}] {adversary_display} — {technique.name}",
            description=description,
            platform=None,
-            procedure_text=(
+            procedure_text=procedure_text,
                f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
                f"See evaluation report at https://evals.mitre.org for full details."
            ),
            created_by=current_user.id,
            state=TestState.in_review,
            attack_success=True,
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
            "eval_round": latest["eval_round"],
        },
    }
 # ---------------------------------------------------------------------------
 # Re-enrich existing tests with richer API data
 # ---------------------------------------------------------------------------
 def re_enrich_evaluation_round(
    db: Session,
    adversary_name: str,
    adversary_display: str,
    eval_round: int,
    current_user: User,
 ) -> dict[str, Any]:
    """Update procedure_text / description / red_summary on already-imported tests
    for a given round using the enriched API data (attack path, criteria, data sources).
    This is non-destructive — it only updates the three narrative fields and does
    not change detection results, state, or validation status.
    """
    # Fetch & aggregate (same flow as import)
    substeps = fetch_results_for_adversary(adversary_name)
    by_technique = _aggregate_by_technique(substeps)
    updated = 0
    skipped = 0
    for mitre_id, agg in by_technique.items():
        technique = (
            db.query(Technique)
            .filter(Technique.mitre_id == mitre_id.upper())
            .first()
        )
        if technique is None:
            skipped += 1
            continue
        # Find the existing test for this round + technique
        existing_test = (
            db.query(Test)
            .filter(
                Test.technique_id == technique.id,
                Test.name.like(f"[EVAL R{eval_round}]%"),
            )
            .first()
        )
        if not existing_test:
            skipped += 1
            continue
        existing_test.description = _build_description(agg, adversary_display, eval_round)
        existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
        existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
        updated += 1
    db.commit()
    logger.info(
        "Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
        eval_round, adversary_display, updated, skipped,
    )
    return {
        "updated": updated,
        "skipped": skipped,
        "adversary": adversary_display,
        "eval_round": eval_round,
        "message": (
            f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
            f"with attack path, criteria and data sources from MITRE API."
        ),
    }
@@ -109,6 +109,14 @@ export interface BulkApproveResult {
  message: string;
 }
 export interface ReEnrichResult {
  updated: number;
  skipped: number;
  adversary: string;
  eval_round: number;
  message: string;
 }
 /** Bulk-approve all in-review evaluation tests (Blue Team side). */
 export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
  const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
@@ -120,3 +128,13 @@ export async function getEvalPendingCount(): Promise<{ pending: number }> {
  const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
  return data;
 }
 /** Re-enrich an already-imported round with attack path, criteria and data sources. */
 export async function reEnrichEvaluationRound(payload: {
  adversary_name: string;
  adversary_display: string;
  eval_round: number;
 }): Promise<ReEnrichResult> {
  const { data } = await client.post<ReEnrichResult>("/system/attck-evaluations/re-enrich", payload);
  return data;
 }
@@ -39,6 +39,7 @@ import {
  checkNewEvaluationRound,
  bulkApproveEvaluationTests,
  getEvalPendingCount,
  reEnrichEvaluationRound,
  type SyncMitreResponse,
  type IntelScanResponse,
  type EvaluationRound,
@@ -46,6 +47,7 @@ import {
  type EvaluationImportResult,
  type NewRoundCheckResult,
  type BulkApproveResult,
  type ReEnrichResult,
 } from "../api/system";
 import {
  getTemplateStats,
@@ -74,6 +76,8 @@ export default function SystemPage() {
  const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
  const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
  const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
  const [reEnrichingRound, setReEnrichingRound] = useState<string | null>(null);
  const [reEnrichResult, setReEnrichResult] = useState<ReEnrichResult | null>(null);
  // ── Existing queries ─────────────────────────────────────────────
  const {
@@ -241,6 +245,18 @@ export default function SystemPage() {
    },
  });
  const reEnrichMutation = useMutation({
    mutationFn: (payload: { adversary_name: string; adversary_display: string; eval_round: number }) =>
      reEnrichEvaluationRound(payload),
    onSuccess: (data) => {
      setReEnrichResult(data);
      setReEnrichingRound(null);
    },
    onError: () => {
      setReEnrichingRound(null);
    },
  });
  const formatNextRun = (dateStr: string | null) => {
    if (!dateStr) return "Not scheduled";
    const date = new Date(dateStr);
@@ -561,6 +577,33 @@ export default function SystemPage() {
          </div>
        )}
        {/* Re-enrich result */}
        {reEnrichResult && (
          <div className="mb-4 rounded-lg border border-blue-500/30 bg-blue-900/20 p-4">
            <div className="flex items-center gap-2 mb-2">
              <Sparkles className="h-4 w-4 text-blue-400" />
              <span className="text-sm font-medium text-blue-400">Re-enrichment complete</span>
            </div>
            <div className="grid grid-cols-2 gap-3 text-center text-sm">
              <div>
                <p className="text-xl font-bold text-white">{reEnrichResult.updated}</p>
                <p className="text-xs text-gray-400">Tests enriched</p>
              </div>
              <div>
                <p className="text-sm font-medium text-blue-400 truncate">{reEnrichResult.adversary}</p>
                <p className="text-xs text-gray-400">Round {reEnrichResult.eval_round}</p>
              </div>
            </div>
            <p className="mt-2 text-xs text-gray-400">{reEnrichResult.message}</p>
            <button
              onClick={() => setReEnrichResult(null)}
              className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
            >
              Dismiss
            </button>
          </div>
        )}
        {/* Import result feedback */}
        {evalImportResult && (
          <div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
@@ -685,7 +728,26 @@ export default function SystemPage() {
                      </td>
                      <td className="py-3 pl-4">
                        {round.imported ? (
-                          <span className="text-xs text-gray-600 italic">Already imported</span>
+                          <button
                            onClick={() => {
                              setReEnrichingRound(round.name);
                              reEnrichMutation.mutate({
                                adversary_name: round.name,
                                adversary_display: round.display_name,
                                eval_round: round.eval_round,
                              });
                            }}
                            disabled={reEnrichMutation.isPending || importRoundMutation.isPending}
                            className="flex items-center gap-1.5 rounded-lg border border-blue-500/30 bg-blue-900/20 px-3 py-1.5 text-xs font-medium text-blue-400 hover:bg-blue-900/40 disabled:opacity-50 transition-colors"
                            title="Update existing tests with attack path, criteria and data sources from MITRE API"
                          >
                            {reEnrichMutation.isPending && reEnrichingRound === round.name ? (
                              <Loader2 className="h-3.5 w-3.5 animate-spin" />
                            ) : (
                              <Sparkles className="h-3.5 w-3.5" />
                            )}
                            Re-enrich
                          </button>
                        ) : (
                          <button
                            onClick={() => {