feat(evaluations): enrich eval tests with attack path, criteria and data sources
Aegis CI / lint-and-test (push) Has been cancelled
Aegis CI / lint-and-test (push) Has been cancelled
- Capture Step.Description (HTML stripped), step name/number, substep ref, criteria, and data sources from MITRE ATT&CK Evaluations API - _aggregate_by_technique() now accumulates ALL occurrences per technique (multiple substep refs, criteria, step contexts) instead of keeping only the best-scoring one - New helper functions _build_procedure_text(), _build_description(), _build_red_summary() generate rich narratives from accumulated occurrences - New re_enrich_evaluation_round() service function + POST endpoint /system/attck-evaluations/re-enrich to update already-imported tests without changing detection results or validation state - Frontend: Re-enrich button per imported round + result banner in SystemPage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -747,6 +747,41 @@ def get_pending_evaluation_count(
|
||||
return {"pending": count}
|
||||
|
||||
|
||||
@router.post("/attck-evaluations/re-enrich")
|
||||
def re_enrich_evaluation_round(
|
||||
payload: dict,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(require_role("admin")),
|
||||
):
|
||||
"""Re-enrich already-imported evaluation tests with rich data from the MITRE API.
|
||||
|
||||
Updates procedure_text (attack path + criteria), description (data sources +
|
||||
substep references) and red_summary — without changing detection results,
|
||||
state or validation status.
|
||||
|
||||
Body: { "adversary_name": "turla", "adversary_display": "Turla", "eval_round": 5 }
|
||||
|
||||
Useful to upgrade tests that were imported before the enrichment feature
|
||||
was added.
|
||||
"""
|
||||
from app.services.attck_evaluations_service import re_enrich_evaluation_round as _re_enrich
|
||||
|
||||
adversary_name = payload.get("adversary_name", "")
|
||||
adversary_display = payload.get("adversary_display", adversary_name)
|
||||
eval_round = payload.get("eval_round", 0)
|
||||
|
||||
if not adversary_name or not eval_round:
|
||||
raise HTTPException(status_code=400, detail="adversary_name and eval_round are required")
|
||||
|
||||
try:
|
||||
summary = _re_enrich(db, adversary_name, adversary_display, eval_round, current_user)
|
||||
except Exception as exc:
|
||||
logger.error("ATT&CK Evaluation re-enrich failed: %s", exc, exc_info=True)
|
||||
raise HTTPException(status_code=502, detail=f"Re-enrich failed: {exc}")
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
@router.post("/email-test")
|
||||
def send_test_email(
|
||||
payload: EmailTestRequest,
|
||||
|
||||
@@ -27,6 +27,7 @@ Important caveats stored in every test's description
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
@@ -273,6 +274,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
|
||||
scenarios = target.get("Detections_By_Step", {})
|
||||
for _scenario_name, scenario_data in scenarios.items():
|
||||
for step in scenario_data.get("Steps", []):
|
||||
step_num = step.get("Step_Num", "")
|
||||
step_name = step.get("Step_Name", "")
|
||||
# Strip HTML tags from the Step.Description narrative
|
||||
step_desc_raw = step.get("Description") or ""
|
||||
step_description = re.sub(r"<[^>]+>", " ", step_desc_raw)
|
||||
step_description = re.sub(r"\s+", " ", step_description).strip()
|
||||
|
||||
for substep in step.get("Substeps", []):
|
||||
# Prefer sub-technique over technique
|
||||
sub = substep.get("Subtechnique") or {}
|
||||
@@ -305,6 +313,14 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
|
||||
best_type = dtype
|
||||
best_note = det.get("Detection_Note", "")
|
||||
|
||||
# Collect all unique data sources from screenshots across all detections
|
||||
data_sources: list[str] = sorted({
|
||||
src
|
||||
for det in detections
|
||||
for sc in det.get("Screenshots", [])
|
||||
for src in sc.get("Data_Sources", [])
|
||||
})
|
||||
|
||||
substeps.append(
|
||||
{
|
||||
"technique_id": technique_id,
|
||||
@@ -314,6 +330,13 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
|
||||
"best_score": best_score,
|
||||
"detection_type": best_type,
|
||||
"note": best_note,
|
||||
# Enrichment fields from the API
|
||||
"step_num": step_num,
|
||||
"step_name": step_name,
|
||||
"step_description": step_description,
|
||||
"substep_ref": substep.get("Substep", ""),
|
||||
"criteria": (substep.get("Criteria") or "").strip(),
|
||||
"data_sources": data_sources,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -321,15 +344,164 @@ def fetch_results_for_adversary(adversary_name: str) -> list[dict[str, Any]]:
|
||||
|
||||
|
||||
def _aggregate_by_technique(substeps: list[dict]) -> dict[str, dict]:
|
||||
"""Aggregate substep results per technique — keep best detection score."""
|
||||
"""Aggregate substep results per technique.
|
||||
|
||||
Keeps the best detection score and accumulates ALL occurrences so that
|
||||
the importer can build a rich attack-path narrative in procedure_text.
|
||||
"""
|
||||
by_technique: dict[str, dict] = {}
|
||||
for sub in substeps:
|
||||
tid = sub["technique_id"]
|
||||
if tid not in by_technique or sub["best_score"] > by_technique[tid]["best_score"]:
|
||||
by_technique[tid] = sub
|
||||
if tid not in by_technique:
|
||||
by_technique[tid] = {**sub, "occurrences": []}
|
||||
|
||||
# Always record this occurrence for the narrative
|
||||
by_technique[tid]["occurrences"].append({
|
||||
"substep_ref": sub["substep_ref"],
|
||||
"step_num": sub["step_num"],
|
||||
"step_name": sub["step_name"],
|
||||
"step_description": sub["step_description"],
|
||||
"criteria": sub["criteria"],
|
||||
"data_sources": sub["data_sources"],
|
||||
"detection_type": sub["detection_type"],
|
||||
"best_score": sub["best_score"],
|
||||
"note": sub["note"],
|
||||
})
|
||||
|
||||
# Promote to best detection if this substep scored higher
|
||||
if sub["best_score"] > by_technique[tid]["best_score"]:
|
||||
by_technique[tid]["best_score"] = sub["best_score"]
|
||||
by_technique[tid]["detection_type"] = sub["detection_type"]
|
||||
by_technique[tid]["note"] = sub["note"]
|
||||
by_technique[tid]["tactic_id"] = sub["tactic_id"]
|
||||
by_technique[tid]["tactic_name"] = sub["tactic_name"]
|
||||
|
||||
return by_technique
|
||||
|
||||
|
||||
def _build_procedure_text(agg: dict, adversary_display: str, eval_round: int) -> str:
|
||||
"""Build a rich attack-path narrative for the Test.procedure_text field."""
|
||||
occurrences = agg.get("occurrences", [])
|
||||
if not occurrences:
|
||||
return (
|
||||
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
|
||||
f"See evaluation report at https://evals.mitre.org for full details."
|
||||
)
|
||||
|
||||
lines: list[str] = []
|
||||
lines.append(f"ATT&CK Evaluation R{eval_round} — {adversary_display}\n")
|
||||
|
||||
# Include step description(s) — deduplicated, one per step
|
||||
seen_steps: set = set()
|
||||
for occ in occurrences:
|
||||
step_key = str(occ.get("step_num", ""))
|
||||
step_name = occ.get("step_name", "")
|
||||
step_desc = occ.get("step_description", "")
|
||||
if step_key and step_key not in seen_steps and step_desc:
|
||||
seen_steps.add(step_key)
|
||||
truncated = step_desc[:500] + ("..." if len(step_desc) > 500 else "")
|
||||
lines.append(f"Step {step_key} — {step_name}:")
|
||||
lines.append(truncated)
|
||||
lines.append("")
|
||||
|
||||
# List all attack criteria for this technique
|
||||
lines.append("Attack steps observed:")
|
||||
for occ in occurrences:
|
||||
ref = occ.get("substep_ref", "")
|
||||
criteria = occ.get("criteria", "")
|
||||
step_name = occ.get("step_name", "")
|
||||
if criteria:
|
||||
prefix = f"[{ref}]" if ref else "•"
|
||||
lines.append(f" {prefix} {criteria}")
|
||||
if step_name:
|
||||
lines.append(f" ↳ Step: {step_name}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_description(agg: dict, adversary_display: str, eval_round: int) -> str:
|
||||
"""Build the full Test.description with detection details and attack path."""
|
||||
occurrences = agg.get("occurrences", [])
|
||||
|
||||
# Collect all unique data sources across every occurrence of this technique
|
||||
all_data_sources: list[str] = sorted({
|
||||
src
|
||||
for occ in occurrences
|
||||
for src in occ.get("data_sources", [])
|
||||
})
|
||||
|
||||
header = (
|
||||
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
|
||||
f"Vendor: CrowdStrike Falcon.\n"
|
||||
f"Detection type achieved: {agg['detection_type']}."
|
||||
)
|
||||
|
||||
ds_section = ""
|
||||
if all_data_sources:
|
||||
ds_section = "\n\nData sources observed:\n" + "\n".join(
|
||||
f" • {ds}" for ds in all_data_sources
|
||||
)
|
||||
|
||||
# Attack path / substep criteria section
|
||||
path_lines: list[str] = []
|
||||
for occ in occurrences:
|
||||
ref = occ.get("substep_ref", "")
|
||||
criteria = occ.get("criteria", "")
|
||||
step_name = occ.get("step_name", "")
|
||||
det_type = occ.get("detection_type", "")
|
||||
if criteria:
|
||||
label = f"[{ref}]" if ref else "•"
|
||||
step_label = f" ({step_name})" if step_name else ""
|
||||
det_label = f" — {det_type}" if det_type and det_type.lower() != "none" else ""
|
||||
path_lines.append(f" {label}{step_label}{det_label}:")
|
||||
path_lines.append(f" {criteria}")
|
||||
|
||||
path_section = ""
|
||||
if path_lines:
|
||||
path_section = "\n\nAttack path — substep criteria:\n" + "\n".join(path_lines)
|
||||
|
||||
warning = (
|
||||
f"\n\n⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
|
||||
f"controlled MITRE lab environment against a simulated {adversary_display} "
|
||||
f"adversary. They do NOT represent your organisation's actual detection "
|
||||
f"capability. Validate in your own environment before approving."
|
||||
)
|
||||
|
||||
note_section = ""
|
||||
if agg.get("note"):
|
||||
note_section = f"\n\nMITRE note: {agg['note']}"
|
||||
|
||||
return header + ds_section + path_section + warning + note_section
|
||||
|
||||
|
||||
def _build_red_summary(agg: dict, adversary_display: str, eval_round: int) -> str:
|
||||
"""Build the Red Team summary for the Test.red_summary field."""
|
||||
occurrences = agg.get("occurrences", [])
|
||||
|
||||
lines = [
|
||||
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})",
|
||||
f"Vendor: CrowdStrike Falcon",
|
||||
f"Best detection level: {agg['detection_type']}",
|
||||
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})",
|
||||
]
|
||||
|
||||
if occurrences:
|
||||
lines.append("")
|
||||
lines.append("Substeps:")
|
||||
for occ in occurrences:
|
||||
ref = occ.get("substep_ref", "")
|
||||
criteria = occ.get("criteria", "")
|
||||
step_name = occ.get("step_name", "")
|
||||
det = occ.get("detection_type", "")
|
||||
if criteria:
|
||||
tag = f"[{ref}]" if ref else "•"
|
||||
step_tag = f" {step_name}:" if step_name else ""
|
||||
det_tag = f" [{det}]" if det and det.lower() != "none" else ""
|
||||
lines.append(f" {tag}{step_tag}{det_tag} {criteria}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main import function
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -387,34 +559,16 @@ def import_evaluation_round(
|
||||
|
||||
detection_result = _score_to_result(agg["best_score"])
|
||||
|
||||
description = (
|
||||
f"Source: MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display}).\n"
|
||||
f"Vendor: CrowdStrike Falcon.\n"
|
||||
f"Detection type achieved: {agg['detection_type']}.\n\n"
|
||||
f"⚠️ IMPORTANT: These results reflect CrowdStrike Falcon performance in a "
|
||||
f"controlled MITRE lab environment against a simulated {adversary_display} "
|
||||
f"adversary. They do NOT represent your organisation's actual detection "
|
||||
f"capability. Validate in your own environment before approving."
|
||||
)
|
||||
if agg["note"]:
|
||||
description += f"\n\nMITRE note: {agg['note']}"
|
||||
|
||||
red_summary = (
|
||||
f"MITRE ATT&CK Evaluation — Round {eval_round} ({adversary_display})\n"
|
||||
f"Vendor: CrowdStrike Falcon\n"
|
||||
f"Best detection level: {agg['detection_type']}\n"
|
||||
f"Tactic: {agg['tactic_name']} ({agg['tactic_id']})"
|
||||
)
|
||||
description = _build_description(agg, adversary_display, eval_round)
|
||||
red_summary = _build_red_summary(agg, adversary_display, eval_round)
|
||||
procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
|
||||
|
||||
test = Test(
|
||||
technique_id=technique.id,
|
||||
name=f"[EVAL R{eval_round}] {adversary_display} — {technique.name}",
|
||||
description=description,
|
||||
platform=None,
|
||||
procedure_text=(
|
||||
f"MITRE ATT&CK Evaluation simulation using {adversary_display} TTPs. "
|
||||
f"See evaluation report at https://evals.mitre.org for full details."
|
||||
),
|
||||
procedure_text=procedure_text,
|
||||
created_by=current_user.id,
|
||||
state=TestState.in_review,
|
||||
attack_success=True,
|
||||
@@ -517,3 +671,74 @@ def check_for_new_round(db: Session) -> dict[str, Any]:
|
||||
"eval_round": latest["eval_round"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Re-enrich existing tests with richer API data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def re_enrich_evaluation_round(
|
||||
db: Session,
|
||||
adversary_name: str,
|
||||
adversary_display: str,
|
||||
eval_round: int,
|
||||
current_user: User,
|
||||
) -> dict[str, Any]:
|
||||
"""Update procedure_text / description / red_summary on already-imported tests
|
||||
for a given round using the enriched API data (attack path, criteria, data sources).
|
||||
|
||||
This is non-destructive — it only updates the three narrative fields and does
|
||||
not change detection results, state, or validation status.
|
||||
"""
|
||||
# Fetch & aggregate (same flow as import)
|
||||
substeps = fetch_results_for_adversary(adversary_name)
|
||||
by_technique = _aggregate_by_technique(substeps)
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
|
||||
for mitre_id, agg in by_technique.items():
|
||||
technique = (
|
||||
db.query(Technique)
|
||||
.filter(Technique.mitre_id == mitre_id.upper())
|
||||
.first()
|
||||
)
|
||||
if technique is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Find the existing test for this round + technique
|
||||
existing_test = (
|
||||
db.query(Test)
|
||||
.filter(
|
||||
Test.technique_id == technique.id,
|
||||
Test.name.like(f"[EVAL R{eval_round}]%"),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if not existing_test:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
existing_test.description = _build_description(agg, adversary_display, eval_round)
|
||||
existing_test.red_summary = _build_red_summary(agg, adversary_display, eval_round)
|
||||
existing_test.procedure_text = _build_procedure_text(agg, adversary_display, eval_round)
|
||||
updated += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
"Re-enrichment complete — round %d (%s): %d tests updated, %d skipped",
|
||||
eval_round, adversary_display, updated, skipped,
|
||||
)
|
||||
return {
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"adversary": adversary_display,
|
||||
"eval_round": eval_round,
|
||||
"message": (
|
||||
f"Re-enriched {updated} tests for {adversary_display} (Round {eval_round}) "
|
||||
f"with attack path, criteria and data sources from MITRE API."
|
||||
),
|
||||
}
|
||||
|
||||
@@ -109,6 +109,14 @@ export interface BulkApproveResult {
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface ReEnrichResult {
|
||||
updated: number;
|
||||
skipped: number;
|
||||
adversary: string;
|
||||
eval_round: number;
|
||||
message: string;
|
||||
}
|
||||
|
||||
/** Bulk-approve all in-review evaluation tests (Blue Team side). */
|
||||
export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
|
||||
const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
|
||||
@@ -120,3 +128,13 @@ export async function getEvalPendingCount(): Promise<{ pending: number }> {
|
||||
const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
|
||||
return data;
|
||||
}
|
||||
|
||||
/** Re-enrich an already-imported round with attack path, criteria and data sources. */
|
||||
export async function reEnrichEvaluationRound(payload: {
|
||||
adversary_name: string;
|
||||
adversary_display: string;
|
||||
eval_round: number;
|
||||
}): Promise<ReEnrichResult> {
|
||||
const { data } = await client.post<ReEnrichResult>("/system/attck-evaluations/re-enrich", payload);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ import {
|
||||
checkNewEvaluationRound,
|
||||
bulkApproveEvaluationTests,
|
||||
getEvalPendingCount,
|
||||
reEnrichEvaluationRound,
|
||||
type SyncMitreResponse,
|
||||
type IntelScanResponse,
|
||||
type EvaluationRound,
|
||||
@@ -46,6 +47,7 @@ import {
|
||||
type EvaluationImportResult,
|
||||
type NewRoundCheckResult,
|
||||
type BulkApproveResult,
|
||||
type ReEnrichResult,
|
||||
} from "../api/system";
|
||||
import {
|
||||
getTemplateStats,
|
||||
@@ -74,6 +76,8 @@ export default function SystemPage() {
|
||||
const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
|
||||
const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
|
||||
const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
|
||||
const [reEnrichingRound, setReEnrichingRound] = useState<string | null>(null);
|
||||
const [reEnrichResult, setReEnrichResult] = useState<ReEnrichResult | null>(null);
|
||||
|
||||
// ── Existing queries ─────────────────────────────────────────────
|
||||
const {
|
||||
@@ -241,6 +245,18 @@ export default function SystemPage() {
|
||||
},
|
||||
});
|
||||
|
||||
const reEnrichMutation = useMutation({
|
||||
mutationFn: (payload: { adversary_name: string; adversary_display: string; eval_round: number }) =>
|
||||
reEnrichEvaluationRound(payload),
|
||||
onSuccess: (data) => {
|
||||
setReEnrichResult(data);
|
||||
setReEnrichingRound(null);
|
||||
},
|
||||
onError: () => {
|
||||
setReEnrichingRound(null);
|
||||
},
|
||||
});
|
||||
|
||||
const formatNextRun = (dateStr: string | null) => {
|
||||
if (!dateStr) return "Not scheduled";
|
||||
const date = new Date(dateStr);
|
||||
@@ -561,6 +577,33 @@ export default function SystemPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Re-enrich result */}
|
||||
{reEnrichResult && (
|
||||
<div className="mb-4 rounded-lg border border-blue-500/30 bg-blue-900/20 p-4">
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<Sparkles className="h-4 w-4 text-blue-400" />
|
||||
<span className="text-sm font-medium text-blue-400">Re-enrichment complete</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-3 text-center text-sm">
|
||||
<div>
|
||||
<p className="text-xl font-bold text-white">{reEnrichResult.updated}</p>
|
||||
<p className="text-xs text-gray-400">Tests enriched</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm font-medium text-blue-400 truncate">{reEnrichResult.adversary}</p>
|
||||
<p className="text-xs text-gray-400">Round {reEnrichResult.eval_round}</p>
|
||||
</div>
|
||||
</div>
|
||||
<p className="mt-2 text-xs text-gray-400">{reEnrichResult.message}</p>
|
||||
<button
|
||||
onClick={() => setReEnrichResult(null)}
|
||||
className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
|
||||
>
|
||||
Dismiss
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Import result feedback */}
|
||||
{evalImportResult && (
|
||||
<div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
|
||||
@@ -685,7 +728,26 @@ export default function SystemPage() {
|
||||
</td>
|
||||
<td className="py-3 pl-4">
|
||||
{round.imported ? (
|
||||
<span className="text-xs text-gray-600 italic">Already imported</span>
|
||||
<button
|
||||
onClick={() => {
|
||||
setReEnrichingRound(round.name);
|
||||
reEnrichMutation.mutate({
|
||||
adversary_name: round.name,
|
||||
adversary_display: round.display_name,
|
||||
eval_round: round.eval_round,
|
||||
});
|
||||
}}
|
||||
disabled={reEnrichMutation.isPending || importRoundMutation.isPending}
|
||||
className="flex items-center gap-1.5 rounded-lg border border-blue-500/30 bg-blue-900/20 px-3 py-1.5 text-xs font-medium text-blue-400 hover:bg-blue-900/40 disabled:opacity-50 transition-colors"
|
||||
title="Update existing tests with attack path, criteria and data sources from MITRE API"
|
||||
>
|
||||
{reEnrichMutation.isPending && reEnrichingRound === round.name ? (
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin" />
|
||||
) : (
|
||||
<Sparkles className="h-3.5 w-3.5" />
|
||||
)}
|
||||
Re-enrich
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => {
|
||||
|
||||
Reference in New Issue
Block a user