feat(evaluations): bulk approve evaluation tests with 4-step confirmation modal
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled

Backend:
- POST /system/attck-evaluations/bulk-approve: finds all [EVAL R*] tests in
  in_review state, approves blue side, transitions to validated, recalculates
  technique statuses, audit logs each test
- GET /system/attck-evaluations/pending-count: returns count of pending eval tests

Frontend:
- BulkApproveModal: 4 mandatory checkboxes before confirm button enables
  (lab env / not org detection / metrics impact / spot-check recommendation)
- Bulk Approve button in header badge showing pending count
- Green result banner showing approved tests + techniques recalculated
- Invalidates techniques, metrics and review-queue queries on success

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
kitos
2026-06-05 16:53:00 +02:00
parent 93b4a700e6
commit 802e8f862b
3 changed files with 390 additions and 1 deletions

View File

@@ -632,6 +632,121 @@ def check_new_evaluation_round(
return check_for_new_round(db)
@router.post("/attck-evaluations/bulk-approve")
def bulk_approve_evaluation_tests(
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Bulk-approve all Blue Team validation for ATT&CK Evaluation imported tests.
Finds every test in ``in_review`` state whose name starts with ``[EVAL R``
and approves the Blue Team side. Because all evaluation imports pre-approve
the Red Team side, this moves every matched test to ``validated`` state.
**Important caveats** (enforced by UI warnings before this is called):
- Results come from a controlled MITRE lab, NOT the organisation's env.
- Validated tests will immediately affect coverage metrics and dashboards.
- Blue Leads should still spot-check high-priority techniques individually.
"""
from datetime import datetime
from app.models.test import Test
from app.models.enums import TestState
from app.models.technique import Technique
from app.services.status_service import recalculate_technique_status
from app.services.audit_service import log_action
# Find all pending evaluation tests
pending = (
db.query(Test)
.filter(
Test.state == TestState.in_review,
Test.name.like("[EVAL R%"),
)
.all()
)
if not pending:
return {
"approved": 0,
"techniques_recalculated": 0,
"message": "No pending evaluation tests found — nothing to approve.",
}
now = datetime.utcnow()
affected_technique_ids: set = set()
for test in pending:
# Approve blue side
test.blue_validation_status = "approved"
test.blue_validated_by = current_user.id
test.blue_validated_at = now
test.blue_validation_notes = (
"Bulk-approved via ATT&CK Evaluations admin panel. "
"Source: MITRE lab environment — not organisational detection."
)
# Red side was pre-approved during import → move to validated
if test.red_validation_status == "approved":
test.state = TestState.validated
# else stays in_review (shouldn't happen for eval imports, but be safe)
if test.technique_id:
affected_technique_ids.add(test.technique_id)
log_action(
db,
user_id=current_user.id,
action="bulk_eval_approve",
entity_type="test",
entity_id=test.id,
details={"source": "attck_evaluation_bulk_approve"},
)
db.flush()
# Recalculate coverage for every touched technique
for tech_id in affected_technique_ids:
tech = db.query(Technique).filter(Technique.id == tech_id).first()
if tech:
recalculate_technique_status(db, tech)
db.commit()
logger.info(
"Bulk eval approval: %d tests validated, %d techniques recalculated (by %s)",
len(pending), len(affected_technique_ids), current_user.email,
)
return {
"approved": len(pending),
"techniques_recalculated": len(affected_technique_ids),
"message": (
f"{len(pending)} evaluation tests approved and moved to Validated. "
f"{len(affected_technique_ids)} technique statuses recalculated."
),
}
@router.get("/attck-evaluations/pending-count")
def get_pending_evaluation_count(
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Return the number of imported evaluation tests still awaiting Blue approval."""
from app.models.test import Test
from app.models.enums import TestState
count = (
db.query(Test)
.filter(
Test.state == TestState.in_review,
Test.name.like("[EVAL R%"),
)
.count()
)
return {"pending": count}
@router.post("/email-test")
def send_test_email(
payload: EmailTestRequest,

View File

@@ -102,3 +102,21 @@ export async function checkNewEvaluationRound(): Promise<NewRoundCheckResult> {
const { data } = await client.get<NewRoundCheckResult>("/system/attck-evaluations/check-new");
return data;
}
export interface BulkApproveResult {
approved: number;
techniques_recalculated: number;
message: string;
}
/** Bulk-approve all in-review evaluation tests (Blue Team side). */
export async function bulkApproveEvaluationTests(): Promise<BulkApproveResult> {
const { data } = await client.post<BulkApproveResult>("/system/attck-evaluations/bulk-approve");
return data;
}
/** Get the count of evaluation tests still awaiting Blue approval. */
export async function getEvalPendingCount(): Promise<{ pending: number }> {
const { data } = await client.get<{ pending: number }>("/system/attck-evaluations/pending-count");
return data;
}

View File

@@ -26,6 +26,7 @@ import {
AlertTriangle,
ExternalLink,
CalendarCheck,
ShieldCheck,
} from "lucide-react";
import client from "../api/client";
import {
@@ -36,12 +37,15 @@ import {
importEvaluationRound,
importLatestEvaluation,
checkNewEvaluationRound,
bulkApproveEvaluationTests,
getEvalPendingCount,
type SyncMitreResponse,
type IntelScanResponse,
type EvaluationRound,
type EvaluationRoundsResponse,
type EvaluationImportResult,
type NewRoundCheckResult,
type BulkApproveResult,
} from "../api/system";
import {
getTemplateStats,
@@ -68,6 +72,8 @@ export default function SystemPage() {
const [evalImportResult, setEvalImportResult] = useState<EvaluationImportResult | null>(null);
const [evalCheckResult, setEvalCheckResult] = useState<NewRoundCheckResult | null>(null);
const [evalImportingRound, setEvalImportingRound] = useState<string | null>(null);
const [showBulkApproveModal, setShowBulkApproveModal] = useState(false);
const [bulkApproveResult, setBulkApproveResult] = useState<BulkApproveResult | null>(null);
// ── Existing queries ─────────────────────────────────────────────
const {
@@ -214,6 +220,27 @@ export default function SystemPage() {
},
});
const {
data: evalPendingData,
refetch: refetchPendingCount,
} = useQuery({
queryKey: ["eval-pending-count"],
queryFn: getEvalPendingCount,
});
const bulkApproveMutation = useMutation({
mutationFn: bulkApproveEvaluationTests,
onSuccess: (data) => {
setBulkApproveResult(data);
setShowBulkApproveModal(false);
refetchPendingCount();
refetchEvalRounds();
queryClient.invalidateQueries({ queryKey: ["techniques"] });
queryClient.invalidateQueries({ queryKey: ["metrics"] });
queryClient.invalidateQueries({ queryKey: ["review-queue"] });
},
});
const formatNextRun = (dateStr: string | null) => {
if (!dateStr) return "Not scheduled";
const date = new Date(dateStr);
@@ -384,7 +411,7 @@ export default function SystemPage() {
</p>
</div>
</div>
<div className="flex items-center gap-2 flex-shrink-0">
<div className="flex items-center gap-2 flex-shrink-0 flex-wrap">
<button
onClick={() => checkNewRoundMutation.mutate()}
disabled={checkNewRoundMutation.isPending || evalRoundsLoading}
@@ -412,6 +439,18 @@ export default function SystemPage() {
)}
Import Latest Round
</button>
{(evalPendingData?.pending ?? 0) > 0 && (
<button
onClick={() => setShowBulkApproveModal(true)}
className="flex items-center gap-2 rounded-lg bg-green-700 px-3 py-2 text-sm font-medium text-white hover:bg-green-600 transition-colors"
>
<ShieldCheck className="h-4 w-4" />
Bulk Approve
<span className="ml-1 rounded-full bg-green-500/30 px-1.5 py-0.5 text-xs font-bold">
{evalPendingData.pending}
</span>
</button>
)}
</div>
</div>
@@ -495,6 +534,33 @@ export default function SystemPage() {
</div>
)}
{/* Bulk approve result */}
{bulkApproveResult && (
<div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
<div className="flex items-center gap-2 mb-2">
<ShieldCheck className="h-4 w-4 text-green-400" />
<span className="text-sm font-medium text-green-400">Bulk approval complete</span>
</div>
<div className="grid grid-cols-2 gap-3 text-center text-sm">
<div>
<p className="text-xl font-bold text-white">{bulkApproveResult.approved}</p>
<p className="text-xs text-gray-400">Tests validated</p>
</div>
<div>
<p className="text-xl font-bold text-white">{bulkApproveResult.techniques_recalculated}</p>
<p className="text-xs text-gray-400">Technique statuses updated</p>
</div>
</div>
<p className="mt-2 text-xs text-gray-400">{bulkApproveResult.message}</p>
<button
onClick={() => setBulkApproveResult(null)}
className="mt-2 text-xs text-gray-500 hover:text-gray-400 underline"
>
Dismiss
</button>
</div>
)}
{/* Import result feedback */}
{evalImportResult && (
<div className="mb-4 rounded-lg border border-green-500/30 bg-green-900/20 p-4">
@@ -661,6 +727,17 @@ export default function SystemPage() {
</div>
</div>
{/* ── Bulk Approve Modal ──────────────────────────────────────── */}
{showBulkApproveModal && (
<BulkApproveModal
pendingCount={evalPendingData?.pending ?? 0}
isPending={bulkApproveMutation.isPending}
error={bulkApproveMutation.isError ? ((bulkApproveMutation.error as Error)?.message ?? "Unknown error") : null}
onConfirm={() => bulkApproveMutation.mutate()}
onClose={() => setShowBulkApproveModal(false)}
/>
)}
{/* ────────────────────────────────────────────────────────────────
TEMPLATE ADMINISTRATION (T-124)
──────────────────────────────────────────────────────────────── */}
@@ -1243,6 +1320,185 @@ function ExportImportSection() {
);
}
/* ── Bulk Approve Evaluation Tests Modal ─────────────────────────── */
function BulkApproveModal({
pendingCount,
isPending,
error,
onConfirm,
onClose,
}: {
pendingCount: number;
isPending: boolean;
error: string | null;
onConfirm: () => void;
onClose: () => void;
}) {
const [checks, setChecks] = useState({
labEnv: false,
notOrg: false,
metricsImpact: false,
spotCheck: false,
});
const allChecked = Object.values(checks).every(Boolean);
const toggle = (key: keyof typeof checks) =>
setChecks((prev) => ({ ...prev, [key]: !prev[key] }));
return (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/70 backdrop-blur-sm p-4">
<div className="w-full max-w-lg rounded-xl border border-orange-500/40 bg-gray-900 shadow-2xl">
{/* Header */}
<div className="flex items-center justify-between border-b border-gray-800 px-6 py-4">
<div className="flex items-center gap-3">
<div className="rounded-lg bg-orange-500/10 p-2">
<ShieldCheck className="h-5 w-5 text-orange-400" />
</div>
<div>
<h3 className="text-base font-semibold text-white">Bulk Approve Evaluation Tests</h3>
<p className="text-xs text-gray-400">
{pendingCount} test{pendingCount !== 1 ? "s" : ""} awaiting Blue Team approval
</p>
</div>
</div>
<button onClick={onClose} className="text-gray-500 hover:text-white transition-colors">
<X className="h-5 w-5" />
</button>
</div>
{/* Body */}
<div className="px-6 py-5 space-y-4">
{/* What will happen */}
<div className="rounded-lg border border-gray-700 bg-gray-800/50 p-3 text-sm text-gray-300">
<p className="font-medium text-white mb-1">What this action does:</p>
<ul className="space-y-1 text-xs text-gray-400 list-disc list-inside">
<li>Approves the Blue Team validation for <strong className="text-white">{pendingCount} imported evaluation tests</strong></li>
<li>Moves all of them from <span className="text-yellow-400">In Review</span> <span className="text-green-400">Validated</span></li>
<li>Recalculates technique coverage across the ATT&amp;CK matrix immediately</li>
<li>Updates programme score, dashboards, and executive metrics</li>
</ul>
</div>
{/* Warnings — must all be checked */}
<p className="text-xs font-semibold uppercase tracking-wider text-gray-500">
Read and confirm each point before proceeding
</p>
<label
className={`flex items-start gap-3 rounded-lg border p-3 cursor-pointer transition-colors ${
checks.labEnv ? "border-orange-500/50 bg-orange-900/10" : "border-gray-700 hover:border-gray-600"
}`}
>
<input
type="checkbox"
checked={checks.labEnv}
onChange={() => toggle("labEnv")}
className="mt-0.5 h-4 w-4 accent-orange-500 flex-shrink-0"
/>
<span className="text-sm text-gray-300">
<strong className="text-orange-400">Lab environment data.</strong>{" "}
These results come from a controlled MITRE Engenuity evaluation against simulated adversaries
not from live attacks on your organisation's infrastructure.
</span>
</label>
<label
className={`flex items-start gap-3 rounded-lg border p-3 cursor-pointer transition-colors ${
checks.notOrg ? "border-orange-500/50 bg-orange-900/10" : "border-gray-700 hover:border-gray-600"
}`}
>
<input
type="checkbox"
checked={checks.notOrg}
onChange={() => toggle("notOrg")}
className="mt-0.5 h-4 w-4 accent-orange-500 flex-shrink-0"
/>
<span className="text-sm text-gray-300">
<strong className="text-orange-400">Not your organisation's actual detection.</strong>{" "}
CrowdStrike Falcon's lab performance does <em>not</em> guarantee the same detection
capability in your specific deployment, configuration, and environment.
</span>
</label>
<label
className={`flex items-start gap-3 rounded-lg border p-3 cursor-pointer transition-colors ${
checks.metricsImpact ? "border-yellow-500/50 bg-yellow-900/10" : "border-gray-700 hover:border-gray-600"
}`}
>
<input
type="checkbox"
checked={checks.metricsImpact}
onChange={() => toggle("metricsImpact")}
className="mt-0.5 h-4 w-4 accent-yellow-500 flex-shrink-0"
/>
<span className="text-sm text-gray-300">
<strong className="text-yellow-400">Coverage metrics will change.</strong>{" "}
Approving these tests will immediately raise the ATT&amp;CK coverage percentage
and programme score. Make sure stakeholders understand these are baseline evaluation
results before sharing reports.
</span>
</label>
<label
className={`flex items-start gap-3 rounded-lg border p-3 cursor-pointer transition-colors ${
checks.spotCheck ? "border-blue-500/50 bg-blue-900/10" : "border-gray-700 hover:border-gray-600"
}`}
>
<input
type="checkbox"
checked={checks.spotCheck}
onChange={() => toggle("spotCheck")}
className="mt-0.5 h-4 w-4 accent-blue-500 flex-shrink-0"
/>
<span className="text-sm text-gray-300">
<strong className="text-blue-400">Spot-checking recommended.</strong>{" "}
I understand that Blue Leads should individually validate high-priority techniques
(e.g. T1059, T1078, T1003) in the actual environment before trusting bulk approval
for those techniques.
</span>
</label>
{/* Error */}
{error && (
<div className="rounded-lg border border-red-500/30 bg-red-900/20 p-3 text-sm text-red-400">
{error}
</div>
)}
</div>
{/* Footer */}
<div className="flex items-center justify-between border-t border-gray-800 px-6 py-4">
<button
onClick={onClose}
disabled={isPending}
className="rounded-lg border border-gray-700 bg-gray-800 px-4 py-2 text-sm font-medium text-gray-300 hover:bg-gray-700 disabled:opacity-50 transition-colors"
>
Cancel
</button>
<button
onClick={onConfirm}
disabled={!allChecked || isPending}
className="flex items-center gap-2 rounded-lg bg-green-700 px-5 py-2 text-sm font-semibold text-white hover:bg-green-600 disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
>
{isPending ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<ShieldCheck className="h-4 w-4" />
)}
{isPending
? `Approving ${pendingCount} tests…`
: allChecked
? `Approve ${pendingCount} Tests`
: "Confirm all 4 points above"}
</button>
</div>
</div>
</div>
);
}
/* ── Create Template Form (inline modal) ──────────────────────────── */
function CreateTemplateForm({