feat(evaluations): enrich eval tests with attack path, criteria and data sources
Aegis CI / lint-and-test (push) Has been cancelled

- Capture Step.Description (HTML stripped), step name/number, substep ref,
  criteria, and data sources from MITRE ATT&CK Evaluations API
- _aggregate_by_technique() now accumulates ALL occurrences per technique
  (multiple substep refs, criteria, step contexts) instead of keeping only
  the best-scoring one
- New helper functions _build_procedure_text(), _build_description(),
  _build_red_summary() generate rich narratives from accumulated occurrences
- New re_enrich_evaluation_round() service function + POST endpoint
  /system/attck-evaluations/re-enrich to update already-imported tests
  without changing detection results or validation state
- Frontend: Re-enrich button per imported round + result banner in SystemPage

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
kitos
2026-06-08 11:42:08 +02:00
parent 72983a022b
commit 7703c36ed7
4 changed files with 366 additions and 26 deletions
+35
View File
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
return {"pending": count} return {"pending": count}
@router.post("/attck-evaluations/re-enrich")
def re_enrich_evaluation_round(
payload: dict,
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Re-enrich already-imported evaluation tests with rich data from the MITRE API.
Updates procedure_text (attack path + criteria), description (data sources +
substep references) and red_summary — without changing detection results,
state or validation status.
Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
Useful to upgrade tests that were imported before the enrichment feature
was added.
"""
from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
adversary_name = payload.get("adversary_name", "")
adversary_display = payload.get("adversary_display", adversary_name)
eval_round = payload.get("eval_round", 0)
if not adversary_name or not eval_round:
raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
try:
summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
except Exception as exc:
logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
return summary
@router.post("/email-test") @router.post("/email-test")
def send_test_email( def send_test_email(
payload: EmailTestRequest, payload: EmailTestRequest,
+250 -25
View File
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
""" """
import logging import logging
import re
import uuid import uuid
from datetime import datetime from datetime import datetime
from typing import Any from typing import Any
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
scenarios = target.get("Detections_By_Step", {}) scenarios = target.get("Detections_By_Step", {})
for _scenario_name, scenario_data in scenarios.items(): for _scenario_name, scenario_data in scenarios.items():
for step in scenario_data.get("Steps", []): for step in scenario_data.get("Steps", []):
step_num = step.get("Step_Num", "")
step_name = step.get("Step_Name", "")
# Strip HTML tags from the Step.Description narrative
step_desc_raw = step.get("Description") or ""
step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
step_description = re.sub(r"\s+", " ", step_description).strip()
for substep in step.get("Substeps", []): for substep in step.get("Substeps", []):
# Prefer sub-technique over technique # Prefer sub-technique over technique
sub = substep.get("Subtechnique") or {} sub = substep.get("Subtechnique") or {}
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
best_type = dtype best_type = dtype
best_note = det.get("Detection_Note", "") best_note = det.get("Detection_Note", "")
# Collect all unique data sources from screenshots across all detections
data_sources: list[str] = sorted({
src
for det in detections
for sc in det.get("Screenshots", [])
for src in sc.get("Data_Sources", [])
})
substeps.append( substeps.append(
{ {
"technique_id": technique_id, "technique_id": technique_id,
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
"best_score": best_score, "best_score": best_score,
"detection_type": best_type, "detection_type": best_type,
"note": best_note, "note": best_note,
# Enrichment fields from the API
"step_num": step_num,
"step_name": step_name,
"step_description": step_description,
"substep_ref": substep.get("Substep", ""),
"criteria": (substep.get("Criteria") or "").strip(),
"data_sources": data_sources,
} }
) )
@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]: def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
"""Aggregate substep results per technique — keep best detection score.""" """Aggregate substep results per technique.
Keeps the best detection score and accumulates ALL occurrences so that
the importer can build a rich attack-path narrative in procedure_text.
"""
by_technique: dict[str, dict] = {} by_technique: dict[str, dict] = {}
for sub in substeps: for sub in substeps:
tid = sub["technique_id"] tid = sub["technique_id"]
if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]: if tid not in by_technique:
by_technique[tid] = sub by_technique[tid] = {**sub, "occurrences": []}
# Always record this occurrence for the narrative
by_technique[tid]["occurrences"].append({
"substep_ref": sub["substep_ref"],
"step_num": sub["step_num"],
"step_name": sub["step_name"],
"step_description": sub["step_description"],
"criteria": sub["criteria"],
"data_sources": sub["data_sources"],
"detection_type": sub["detection_type"],
"best_score": sub["best_score"],
"note": sub["note"],
})
# Promote to best detection if this substep scored higher
if sub["best_score"] > by_technique[tid]["best_score"]:
by_technique[tid]["best_score"] = sub["best_score"]
by_technique[tid]["detection_type"] = sub["detection_type"]
by_technique[tid]["note"] = sub["note"]
by_technique[tid]["tactic_id"] = sub["tactic_id"]
by_technique[tid]["tactic_name"] = sub["tactic_name"]
return by_technique return by_technique
def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build a rich attack-path narrative for the Test.procedure_text field."""
occurrences = agg.get("occurrences", [])
if not occurrences:
return (
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
f"See evaluation report at https://evals.mitre.org for full details."
)
lines: list[str] = []
lines.append(f"ATT&CK Evaluation R{eval_round}{adversary_display}\n")
# Include step description(s) — deduplicated, one per step
seen_steps: set = set()
for occ in occurrences:
step_key = str(occ.get("step_num", ""))
step_name = occ.get("step_name", "")
step_desc = occ.get("step_description", "")
if step_key and step_key not in seen_steps and step_desc:
seen_steps.add(step_key)
truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
lines.append(f"Step {step_key}{step_name}:")
lines.append(truncated)
lines.append("")
# List all attack criteria for this technique
lines.append("Attack steps observed:")
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
if criteria:
prefix = f"[{ref}]" if ref else ""
lines.append(f" {prefix} {criteria}")
if step_name:
lines.append(f" ↳ Step: {step_name}")
return "\n".join(lines)
def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build the full Test.description with detection details and attack path."""
occurrences = agg.get("occurrences", [])
# Collect all unique data sources across every occurrence of this technique
all_data_sources: list[str] = sorted({
src
for occ in occurrences
for src in occ.get("data_sources", [])
})
header = (
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
f"Vendor: CrowdStrike Falcon.\n"
f"Detection type achieved: {agg['detection_type']}."
)
ds_section = ""
if all_data_sources:
ds_section = "\n\nData sources observed:\n" + "\n".join(
f"{ds}" for ds in all_data_sources
)
# Attack path / substep criteria section
path_lines: list[str] = []
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
det_type = occ.get("detection_type", "")
if criteria:
label = f"[{ref}]" if ref else ""
step_label = f" ({step_name})" if step_name else ""
det_label = f"{det_type}" if det_type and det_type.lower() != "none" else ""
path_lines.append(f" {label}{step_label}{det_label}:")
path_lines.append(f" {criteria}")
path_section = ""
if path_lines:
path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
warning = (
f"\n\n⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
f"controlled MITRE lab environment against a simulated {adversary_display} "
f"adversary. They do NOT represent your organisation's actual detection "
f"capability. Validate in your own environment before approving."
)
note_section = ""
if agg.get("note"):
note_section = f"\n\nMITRE note: {agg['note']}"
return header + ds_section + path_section + warning + note_section
def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
"""Build the Red Team summary for the Test.red_summary field."""
occurrences = agg.get("occurrences", [])
lines = [
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
f"Vendor: CrowdStrike Falcon",
f"Best detection level: {agg['detection_type']}",
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
]
if occurrences:
lines.append("")
lines.append("Substeps:")
for occ in occurrences:
ref = occ.get("substep_ref", "")
criteria = occ.get("criteria", "")
step_name = occ.get("step_name", "")
det = occ.get("detection_type", "")
if criteria:
tag = f"[{ref}]" if ref else ""
step_tag = f" {step_name}:" if step_name else ""
det_tag = f" [{det}]" if det and det.lower() != "none" else ""
lines.append(f" {tag}{step_tag}{det_tag} {criteria}")
return "\n".join(lines)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Main import function # Main import function
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -387,34 +559,16 @@ def import_evaluation_round(
detection_result = _score_to_result(agg["best_score"]) detection_result = _score_to_result(agg["best_score"])
description = ( description = _build_description(agg, adversary_display, eval_round)
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n" red_summary = _build_red_summary(agg, adversary_display, eval_round)
f"Vendor: CrowdStrike Falcon.\n" procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
f"Detection type achieved: {agg['detection_type']}.\n\n"
f"⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
f"controlled MITRE lab environment against a simulated {adversary_display} "
f"adversary. They do NOT represent your organisation's actual detection "
f"capability. Validate in your own environment before approving."
)
if agg["note"]:
description += f"\n\nMITRE note: {agg['note']}"
red_summary = (
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
f"Vendor: CrowdStrike Falcon\n"
f"Best detection level: {agg['detection_type']}\n"
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
)
test = Test( test = Test(
technique_id=technique.id, technique_id=technique.id,
name=f"[EVAL R{eval_round}] {adversary_display}{technique.name}", name=f"[EVAL R{eval_round}] {adversary_display}{technique.name}",
description=description, description=description,
platform=None, platform=None,
procedure_text=( procedure_text=procedure_text,
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
f"See evaluation report at https://evals.mitre.org for full details."
),
created_by=current_user.id, created_by=current_user.id,
state=TestState.in_review, state=TestState.in_review,
attack_success=True, attack_success=True,
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
"eval_round": latest["eval_round"], "eval_round": latest["eval_round"],
}, },
} }
# ---------------------------------------------------------------------------
# Re-enrich existing tests with richer API data
# ---------------------------------------------------------------------------
def re_enrich_evaluation_round(
db: Session,
adversary_name: str,
adversary_display: str,
eval_round: int,
current_user: User,
) -> dict[str, Any]:
"""Update procedure_text / description / red_summary on already-imported tests
for a given round using the enriched API data (attack path, criteria, data sources).
This is non-destructive — it only updates the three narrative fields and does
not change detection results, state, or validation status.
"""
# Fetch & aggregate (same flow as import)
substeps = fetch_results_for_adversary(adversary_name)
by_technique = _aggregate_by_technique(substeps)
updated = 0
skipped = 0
for mitre_id, agg in by_technique.items():
technique = (
db.query(Technique)
.filter(Technique.mitre_id == mitre_id.upper())
.first()
)
if technique is None:
skipped += 1
continue
# Find the existing test for this round + technique
existing_test = (
db.query(Test)
.filter(
Test.technique_id == technique.id,
Test.name.like(f"[EVAL R{eval_round}]%"),
)
.first()
)
if not existing_test:
skipped += 1
continue
existing_test.description = _build_description(agg, adversary_display, eval_round)
existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
updated += 1
db.commit()
logger.info(
"Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
eval_round, adversary_display, updated, skipped,
)
return {
"updated": updated,
"skipped": skipped,
"adversary": adversary_display,
"eval_round": eval_round,
"message": (
f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
f"with attack path, criteria and data sources from MITRE API."
),
}
+18
View File
@@ -109,6 +109,14 @@ export interface BulkApproveResult {
message: string; message: string;
} }
export interface ReEnrichResult {
updated: number;
skipped: number;
adversary: string;
eval_round: number;
message: string;
}
/** Bulk-approve all in-review evaluation tests (Blue Team side). */ /** Bulk-approve all in-review evaluation tests (Blue Team side). */
export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> { export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve"); const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
@@ -120,3 +128,13 @@ export async function getEvalPendingCount(): Promise<{ pending: number }> {
const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count"); const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
return data; return data;
} }
/** Re-enrich an already-imported round with attack path, criteria and data sources. */
export async function reEnrichEvaluationRound(payload: {
adversary_name: string;
adversary_display: string;
eval_round: number;
}): Promise<ReEnrichResult> {
const { data } = await client.post<ReEnrichResult>("/system/attck-evaluations/re-enrich", payload);
return data;
}
+63 -1
View File
@@ -39,6 +39,7 @@ import {
checkNewEvaluationRound, checkNewEvaluationRound,
bulkApproveEvaluationTests, bulkApproveEvaluationTests,
getEvalPendingCount, getEvalPendingCount,
reEnrichEvaluationRound,
type SyncMitreResponse, type SyncMitreResponse,
type IntelScanResponse, type IntelScanResponse,
type EvaluationRound, type EvaluationRound,
@@ -46,6 +47,7 @@ import {
type EvaluationImportResult, type EvaluationImportResult,
type NewRoundCheckResult, type NewRoundCheckResult,
type BulkApproveResult, type BulkApproveResult,
type ReEnrichResult,
} from "../api/system"; } from "../api/system";
import { import {
getTemplateStats, getTemplateStats,
@@ -74,6 +76,8 @@ export default function SystemPage() {
const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null); const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
const [showBulkApproveModal, setShowBulkApproveModal] = useState(false); const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null); const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
const [reEnrichingRound, setReEnrichingRound] = useState<string | null>(null);
const [reEnrichResult, setReEnrichResult] = useState<ReEnrichResult | null>(null);
// ── Existing queries ───────────────────────────────────────────── // ── Existing queries ─────────────────────────────────────────────
const { const {
@@ -241,6 +245,18 @@ export default function SystemPage() {
}, },
}); });
const reEnrichMutation = useMutation({
mutationFn: (payload: { adversary_name: string; adversary_display: string; eval_round: number }) =>
reEnrichEvaluationRound(payload),
onSuccess: (data) => {
setReEnrichResult(data);
setReEnrichingRound(null);
},
onError: () => {
setReEnrichingRound(null);
},
});
const formatNextRun = (dateStr: string | null) => { const formatNextRun = (dateStr: string | null) => {
if (!dateStr) return "Not scheduled"; if (!dateStr) return "Not scheduled";
const date = new Date(dateStr); const date = new Date(dateStr);
@@ -561,6 +577,33 @@ export default function SystemPage() {
</div> </div>
)} )}
{/* Re-enrich result */}
{reEnrichResult && (
<div className="mb-4 rounded-lg border border-blue-500/30 bg-blue-900/20 p-4">
<div className="flex items-center gap-2 mb-2">
<Sparkles className="h-4 w-4 text-blue-400" />
<span className="text-sm font-medium text-blue-400">Re-enrichment complete</span>
</div>
<div className="grid grid-cols-2 gap-3 text-center text-sm">
<div>
<p className="text-xl font-bold text-white">{reEnrichResult.updated}</p>
<p className="text-xs text-gray-400">Tests enriched</p>
</div>
<div>
<p className="text-sm font-medium text-blue-400 truncate">{reEnrichResult.adversary}</p>
<p className="text-xs text-gray-400">Round {reEnrichResult.eval_round}</p>
</div>
</div>
<p className="mt-2 text-xs text-gray-400">{reEnrichResult.message}</p>
<button
onClick={() => setReEnrichResult(null)}
className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
>
Dismiss
</button>
</div>
)}
{/* Import result feedback */} {/* Import result feedback */}
{evalImportResult && ( {evalImportResult && (
<div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4"> <div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
@@ -685,7 +728,26 @@ export default function SystemPage() {
</td> </td>
<td className="py-3 pl-4"> <td className="py-3 pl-4">
{round.imported ? ( {round.imported ? (
<span className="text-xs text-gray-600 italic">Already imported</span> <button
onClick={() => {
setReEnrichingRound(round.name);
reEnrichMutation.mutate({
adversary_name: round.name,
adversary_display: round.display_name,
eval_round: round.eval_round,
});
}}
disabled={reEnrichMutation.isPending || importRoundMutation.isPending}
className="flex items-center gap-1.5 rounded-lg border border-blue-500/30 bg-blue-900/20 px-3 py-1.5 text-xs font-medium text-blue-400 hover:bg-blue-900/40 disabled:opacity-50 transition-colors"
title="Update existing tests with attack path, criteria and data sources from MITRE API"
>
{reEnrichMutation.isPending && reEnrichingRound === round.name ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<Sparkles className="h-3.5 w-3.5" />
)}
Re-enrich
</button>
) : ( ) : (
<button <button
onClick={() => { onClick={() => {