feat(evaluations): enrich eval tests with attack path, criteria and data sources

- Capture Step.Description (HTML stripped), step name/number, substep ref,
  criteria, and data sources from MITRE ATT&CK Evaluations API
- _aggregate_by_technique() now accumulates ALL occurrences per technique
  (multiple substep refs, criteria, step contexts) instead of keeping only
  the best-scoring one
- New helper functions _build_procedure_text(), _build_description(),
  _build_red_summary() generate rich narratives from accumulated occurrences
- New re_enrich_evaluation_round() service function + POST endpoint
  /system/attck-evaluations/re-enrich to update already-imported tests
  without changing detection results or validation state
- Frontend: Re-enrich button per imported round + result banner in SystemPage
This commit is contained in:
kitos
2026-06-08 11:42:08 +02:00
parent 467afc334d
commit e2861a08bc
4 changed files with 366 additions and 26 deletions
+35
View File
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
return {"pending": count}
@router.post("/attck-evaluations/re-enrich")
def re_enrich_evaluation_round(
payload: dict,
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Re-enrich already-imported evaluation tests with rich data from the MITRE API.
Updates procedure_text (attack path + criteria), description (data sources +
substep references) and red_summary — without changing detection results,
state or validation status.
Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
Useful to upgrade tests that were imported before the enrichment feature
was added.
"""
from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
adversary_name = payload.get("adversary_name", "")
adversary_display = payload.get("adversary_display", adversary_name)
eval_round = payload.get("eval_round", 0)
if not adversary_name or not eval_round:
raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
try:
summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
except Exception as exc:
logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
return summary
@router.post("/email-test")
def send_test_email(
payload: EmailTestRequest,
+250 -25
View File
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
"""
import logging
import re
import uuid
from datetime import datetime
from typing import Any
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
scenarios = target.get("Detections_By_Step", {})
for _scenario_name, scenario_data in scenarios.items():
for step in scenario_data.get("Steps", []):
step_num = step.get("Step_Num", "")
step_name = step.get("Step_Name", "")
# Strip HTML tags from the Step.Description narrative
step_desc_raw = step.get("Description") or ""
step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
step_description = re.sub(r"\s+", " ", step_description).strip()
for substep in step.get("Substeps", []):
# Prefer sub-technique over technique
sub = substep.get("Subtechnique") or {}
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
best_type = dtype
best_note = det.get("Detection_Note", "")
# Collect all unique data sources from screenshots across all detections
data_sources: list[str] = sorted({
src
for det in detections
for sc in det.get("Screenshots", [])
for src in sc.get("Data_Sources", [])
})
substeps.append(
{
"technique_id": technique_id,
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
"best_score": best_score,
"detection_type": best_type,
"note": best_note,
# Enrichment fields from the API
"step_num": step_num,
"step_name": step_name,
"step_description": step_description,
"substep_ref": substep.get("Substep", ""),
"criteria": (substep.get("Criteria") or "").strip(),
"data_sources": data_sources,
}
)
@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
"""Aggregate substep results per technique — keep best detection score."""
"""Aggregate substep results per technique.
Keeps the best detection score and accumulates ALL occurrences so that
the importer can build a rich attack-path narrative in procedure_text.
"""
by_technique: dict[str, dict] = {}
for sub in substeps:
tid = sub["technique_id"]
if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
by_technique[tid] = sub
if tid not in by_technique:
by_technique[tid] = {**sub, "occurrences": []}
# Always record this occurrence for the narrative
by_technique[tid]["occurrences"].append({
"substep_ref": sub["substep_ref"],
"step_num": sub["step_num"],
"step_name": sub["step_name"],
"step_description": sub["step_description"],
"criteria": sub["criteria"],
"data_sources": sub["data_sources"],
"detection_type": sub["detection_type"],
"best_score": sub["best_score"],
"note": sub["note"],
})
# Promote to best detection if this substep scored higher
if sub["best_score"] > by_technique[tid]["best_score"]:
by_technique[tid]["best_score"] = sub["best_score"]
by_technique[tid]["detection_type"] = sub["detection_type"]
by_technique[tid]["note"] = sub["note"]
by_technique[tid]["tactic_id"] = sub["tactic_id"]
by_technique[tid]["tactic_name"] = sub["tactic_name"]
return by_technique
def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build a rich attack-path narrative for the Test.procedure_text field."""
occurrences = agg.get("occurrences", [])
if not occurrences:
return (
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
f"See evaluation report at https://evals.mitre.org for full details."
)
lines: list[str] = []
lines.append(f"ATT&CK Evaluation R{eval_round}{adversary_display}\n")
# Include step description(s) — deduplicated, one per step
seen_steps: set = set()
for occ in occurrences:
step_key = str(occ.get("step_num", ""))
step_name = occ.get("step_name", "")
step_desc = occ.get("step_description", "")
if step_key and step_key not in seen_steps and step_desc:
seen_steps.add(step_key)
truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
lines.append(f"Step {step_key}{step_name}:")
lines.append(truncated)
lines.append("")
# List all attack criteria for this technique
lines.append("Attack steps observed:")
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
if criteria:
prefix = f"[{ref}]" if ref else ""
lines.append(f" {prefix} {criteria}")
if step_name:
lines.append(f" ↳ Step: {step_name}")
return "\n".join(lines)
def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build the full Test.description with detection details and attack path."""
occurrences = agg.get("occurrences", [])
# Collect all unique data sources across every occurrence of this technique
all_data_sources: list[str] = sorted({
src
for occ in occurrences
for src in occ.get("data_sources", [])
})
header = (
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
f"Vendor: CrowdStrike Falcon.\n"
f"Detection type achieved: {agg['detection_type']}."
)
ds_section = ""
if all_data_sources:
ds_section = "\n\nData sources observed:\n" + "\n".join(
f"{ds}" for ds in all_data_sources
)
# Attack path / substep criteria section
path_lines: list[str] = []
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
det_type = occ.get("detection_type", "")
if criteria:
label = f"[{ref}]" if ref else ""
step_label = f" ({step_name})" if step_name else ""
det_label = f"{det_type}" if det_type and det_type.lower() != "none" else ""
path_lines.append(f" {label}{step_label}{det_label}:")
path_lines.append(f" {criteria}")
path_section = ""
if path_lines:
path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
warning = (
f"\n\n⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
f"controlled MITRE lab environment against a simulated {adversary_display} "
f"adversary. They do NOT represent your organisation's actual detection "
f"capability. Validate in your own environment before approving."
)
note_section = ""
if agg.get("note"):
note_section = f"\n\nMITRE note: {agg['note']}"
return header + ds_section + path_section + warning + note_section
def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build the Red Team summary for the Test.red_summary field."""
occurrences = agg.get("occurrences", [])
lines = [
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
f"Vendor: CrowdStrike Falcon",
f"Best detection level: {agg['detection_type']}",
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
]
if occurrences:
lines.append("")
lines.append("Substeps:")
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
det = occ.get("detection_type", "")
if criteria:
tag = f"[{ref}]" if ref else ""
step_tag = f" {step_name}:" if step_name else ""
det_tag = f" [{det}]" if det and det.lower() != "none" else ""
lines.append(f" {tag}{step_tag}{det_tag} {criteria}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main import function
# ---------------------------------------------------------------------------
@@ -387,34 +559,16 @@ def import_evaluation_round(
detection_result = _score_to_result(agg["best_score"])
description = (
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
f"Vendor: CrowdStrike Falcon.\n"
f"Detection type achieved: {agg['detection_type']}.\n\n"
f"⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
f"controlled MITRE lab environment against a simulated {adversary_display} "
f"adversary. They do NOT represent your organisation's actual detection "
f"capability. Validate in your own environment before approving."
)
if agg["note"]:
description += f"\n\nMITRE note: {agg['note']}"
red_summary = (
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
f"Vendor: CrowdStrike Falcon\n"
f"Best detection level: {agg['detection_type']}\n"
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
)
description = _build_description(agg, adversary_display, eval_round)
red_summary = _build_red_summary(agg, adversary_display, eval_round)
procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
test = Test(
technique_id=technique.id,
name=f"[EVAL R{eval_round}] {adversary_display}{technique.name}",
description=description,
platform=None,
procedure_text=(
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
f"See evaluation report at https://evals.mitre.org for full details."
),
procedure_text=procedure_text,
created_by=current_user.id,
state=TestState.in_review,
attack_success=True,
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
"eval_round": latest["eval_round"],
},
}
# ---------------------------------------------------------------------------
# Re-enrich existing tests with richer API data
# ---------------------------------------------------------------------------
def re_enrich_evaluation_round(
db: Session,
adversary_name: str,
adversary_display: str,
eval_round: int,
current_user: User,
) -> dict[str, Any]:
"""Update procedure_text / description / red_summary on already-imported tests
for a given round using the enriched API data (attack path, criteria, data sources).
This is non-destructive — it only updates the three narrative fields and does
not change detection results, state, or validation status.
"""
# Fetch & aggregate (same flow as import)
substeps = fetch_results_for_adversary(adversary_name)
by_technique = _aggregate_by_technique(substeps)
updated = 0
skipped = 0
for mitre_id, agg in by_technique.items():
technique = (
db.query(Technique)
.filter(Technique.mitre_id == mitre_id.upper())
.first()
)
if technique is None:
skipped += 1
continue
# Find the existing test for this round + technique
existing_test = (
db.query(Test)
.filter(
Test.technique_id == technique.id,
Test.name.like(f"[EVAL R{eval_round}]%"),
)
.first()
)
if not existing_test:
skipped += 1
continue
existing_test.description = _build_description(agg, adversary_display, eval_round)
existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
updated += 1
db.commit()
logger.info(
"Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
eval_round, adversary_display, updated, skipped,
)
return {
"updated": updated,
"skipped": skipped,
"adversary": adversary_display,
"eval_round": eval_round,
"message": (
f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
f"with attack path, criteria and data sources from MITRE API."
),
}
+18
View File
@@ -109,6 +109,14 @@ export interface BulkApproveResult {
message: string;
}
export interface ReEnrichResult {
updated: number;
skipped: number;
adversary: string;
eval_round: number;
message: string;
}
/** Bulk-approve all in-review evaluation tests (Blue Team side). */
export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
@@ -120,3 +128,13 @@ export async function getEvalPendingCount(): Promise<{ pending: number }> {
const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
return data;
}
/** Re-enrich an already-imported round with attack path, criteria and data sources. */
export async function reEnrichEvaluationRound(payload: {
adversary_name: string;
adversary_display: string;
eval_round: number;
}): Promise<ReEnrichResult> {
const { data } = await client.post<ReEnrichResult>("/system/attck-evaluations/re-enrich", payload);
return data;
}
+63 -1
View File
@@ -39,6 +39,7 @@ import {
checkNewEvaluationRound,
bulkApproveEvaluationTests,
getEvalPendingCount,
reEnrichEvaluationRound,
type SyncMitreResponse,
type IntelScanResponse,
type EvaluationRound,
@@ -46,6 +47,7 @@ import {
type EvaluationImportResult,
type NewRoundCheckResult,
type BulkApproveResult,
type ReEnrichResult,
} from "../api/system";
import {
getTemplateStats,
@@ -74,6 +76,8 @@ export default function SystemPage() {
const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
const [reEnrichingRound, setReEnrichingRound] = useState<string | null>(null);
const [reEnrichResult, setReEnrichResult] = useState<ReEnrichResult | null>(null);
// ── Existing queries ─────────────────────────────────────────────
const {
@@ -241,6 +245,18 @@ export default function SystemPage() {
},
});
const reEnrichMutation = useMutation({
mutationFn: (payload: { adversary_name: string; adversary_display: string; eval_round: number }) =>
reEnrichEvaluationRound(payload),
onSuccess: (data) => {
setReEnrichResult(data);
setReEnrichingRound(null);
},
onError: () => {
setReEnrichingRound(null);
},
});
const formatNextRun = (dateStr: string | null) => {
if (!dateStr) return "Not scheduled";
const date = new Date(dateStr);
@@ -561,6 +577,33 @@ export default function SystemPage() {
</div>
)}
{/* Re-enrich result */}
{reEnrichResult && (
<div className="mb-4 rounded-lg border border-blue-500/30 bg-blue-900/20 p-4">
<div className="flex items-center gap-2 mb-2">
<Sparkles className="h-4 w-4 text-blue-400" />
<span className="text-sm font-medium text-blue-400">Re-enrichment complete</span>
</div>
<div className="grid grid-cols-2 gap-3 text-center text-sm">
<div>
<p className="text-xl font-bold text-white">{reEnrichResult.updated}</p>
<p className="text-xs text-gray-400">Tests enriched</p>
</div>
<div>
<p className="text-sm font-medium text-blue-400 truncate">{reEnrichResult.adversary}</p>
<p className="text-xs text-gray-400">Round {reEnrichResult.eval_round}</p>
</div>
</div>
<p className="mt-2 text-xs text-gray-400">{reEnrichResult.message}</p>
<button
onClick={() => setReEnrichResult(null)}
className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
>
Dismiss
</button>
</div>
)}
{/* Import result feedback */}
{evalImportResult && (
<div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
@@ -685,7 +728,26 @@ export default function SystemPage() {
</td>
<td className="py-3 pl-4">
{round.imported ? (
<span className="text-xs text-gray-600 italic">Already imported</span>
<button
onClick={() => {
setReEnrichingRound(round.name);
reEnrichMutation.mutate({
adversary_name: round.name,
adversary_display: round.display_name,
eval_round: round.eval_round,
});
}}
disabled={reEnrichMutation.isPending || importRoundMutation.isPending}
className="flex items-center gap-1.5 rounded-lg border border-blue-500/30 bg-blue-900/20 px-3 py-1.5 text-xs font-medium text-blue-400 hover:bg-blue-900/40 disabled:opacity-50 transition-colors"
title="Update existing tests with attack path, criteria and data sources from MITRE API"
>
{reEnrichMutation.isPending && reEnrichingRound === round.name ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<Sparkles className="h-3.5 w-3.5" />
)}
Re-enrich
</button>
) : (
<button
onClick={() => {