Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled
MTTD: was querying AuditLog for action names that don't match actual logged actions. Now uses red_started_at → blue_started_at directly (both stored on the Test record). Net of red_paused_seconds. MTTR: was searching for remediation_status=completed (no data). Redefined as total pipeline time: red_started_at → blue_validated_at net of all paused time. Only counts fully validated tests. Red avg time: was using red_validated_at - created_at (created_at NULL for many tests). Now uses blue_started_at - red_started_at net paused. Blue avg time: was using blue_validated_at - red_validated_at (wrong phase boundary). Now uses blue_work_started_at (or blue_started_at fallback) → blue_validated_at net of blue_paused_seconds.
466 lines
15 KiB
Python
466 lines
15 KiB
Python
"""Operational metrics service — MTTD, MTTR, Detection Efficacy, and more.
|
|
|
|
Calculates security operations KPIs from test data and audit logs.
|
|
"""
|
|
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import func, case, and_, or_, extract
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.models.test import Test
|
|
from app.models.technique import Technique
|
|
from app.models.test_detection_result import TestDetectionResult
|
|
from app.models.audit import AuditLog
|
|
from app.models.enums import TestState, TestResult
|
|
|
|
|
|
def _safe_stats(values: list[float]) -> dict:
|
|
"""Compute mean, median, min, max from a list of floats."""
|
|
if not values:
|
|
return None
|
|
sorted_vals = sorted(values)
|
|
n = len(sorted_vals)
|
|
return {
|
|
"mean_hours": round(sum(sorted_vals) / n, 1),
|
|
"median_hours": round(sorted_vals[n // 2], 1),
|
|
"min_hours": round(sorted_vals[0], 1),
|
|
"max_hours": round(sorted_vals[-1], 1),
|
|
"sample_size": n,
|
|
}
|
|
|
|
|
|
# ── MTTD (Mean Time to Detect) ───────────────────────────────────────
|
|
|
|
|
|
def calculate_mttd(db: Session) -> Optional[dict]:
|
|
"""Calculate Mean Time to Detect.
|
|
|
|
Uses direct timestamp fields on the Test record:
|
|
red_started_at → when Red Team started the attack
|
|
blue_started_at → when Red Team submitted to Blue (attack entered detection phase)
|
|
|
|
MTTD = blue_started_at - red_started_at - red_paused_seconds
|
|
Represents how long Red Team spent executing before Blue received the test.
|
|
"""
|
|
tests = (
|
|
db.query(Test)
|
|
.filter(
|
|
Test.red_started_at.isnot(None),
|
|
Test.blue_started_at.isnot(None),
|
|
)
|
|
.all()
|
|
)
|
|
|
|
detection_times = []
|
|
for t in tests:
|
|
gross_secs = (t.blue_started_at - t.red_started_at).total_seconds()
|
|
net_secs = gross_secs - (t.red_paused_seconds or 0)
|
|
if net_secs > 0:
|
|
detection_times.append(net_secs / 3600)
|
|
|
|
return _safe_stats(detection_times)
|
|
|
|
|
|
# ── MTTR (Mean Time to Respond/Remediate) ─────────────────────────────
|
|
|
|
|
|
def calculate_mttr(db: Session) -> Optional[dict]:
|
|
"""Calculate Mean Time to Respond.
|
|
|
|
Redefined as total pipeline time from attack start to full validation:
|
|
red_started_at → blue_validated_at (net of paused time).
|
|
|
|
Represents how long the full security testing cycle takes end-to-end.
|
|
Only uses tests that have been fully validated (both sides approved).
|
|
"""
|
|
tests = (
|
|
db.query(Test)
|
|
.filter(
|
|
Test.state == TestState.validated,
|
|
Test.red_started_at.isnot(None),
|
|
Test.blue_validated_at.isnot(None),
|
|
)
|
|
.all()
|
|
)
|
|
|
|
response_times = []
|
|
for t in tests:
|
|
gross_secs = (t.blue_validated_at - t.red_started_at).total_seconds()
|
|
paused = (t.red_paused_seconds or 0) + (t.blue_paused_seconds or 0)
|
|
net_secs = gross_secs - paused
|
|
if net_secs > 0:
|
|
response_times.append(net_secs / 3600)
|
|
|
|
return _safe_stats(response_times)
|
|
|
|
|
|
# ── Detection Efficacy ───────────────────────────────────────────────
|
|
|
|
|
|
def calculate_detection_efficacy(db: Session) -> dict:
|
|
"""Calculate detection efficacy: detected / total validated tests."""
|
|
validated_tests = (
|
|
db.query(Test)
|
|
.filter(Test.state == TestState.validated)
|
|
.all()
|
|
)
|
|
|
|
total = len(validated_tests)
|
|
if total == 0:
|
|
return {
|
|
"percentage": 0,
|
|
"detected": 0,
|
|
"partially": 0,
|
|
"not_detected": 0,
|
|
"total": 0,
|
|
}
|
|
|
|
detected = len([t for t in validated_tests if t.detection_result == TestResult.detected])
|
|
partially = len([t for t in validated_tests if t.detection_result == TestResult.partially_detected])
|
|
not_detected = len([t for t in validated_tests if t.detection_result == TestResult.not_detected])
|
|
|
|
percentage = round((detected / total) * 100, 1) if total > 0 else 0
|
|
|
|
return {
|
|
"percentage": percentage,
|
|
"detected": detected,
|
|
"partially": partially,
|
|
"not_detected": not_detected,
|
|
"total": total,
|
|
}
|
|
|
|
|
|
# ── Alert Fidelity ──────────────────────────────────────────────────
|
|
|
|
|
|
def calculate_alert_fidelity(db: Session) -> dict:
|
|
"""Calculate alert fidelity: ratio of triggered detection rules."""
|
|
total_evaluated = (
|
|
db.query(func.count(TestDetectionResult.id))
|
|
.filter(TestDetectionResult.triggered.isnot(None))
|
|
.scalar()
|
|
) or 0
|
|
|
|
triggered = (
|
|
db.query(func.count(TestDetectionResult.id))
|
|
.filter(TestDetectionResult.triggered == True)
|
|
.scalar()
|
|
) or 0
|
|
|
|
not_triggered = total_evaluated - triggered
|
|
|
|
return {
|
|
"percentage": round((triggered / total_evaluated) * 100, 1) if total_evaluated > 0 else 0,
|
|
"triggered": triggered,
|
|
"not_triggered": not_triggered,
|
|
"total_evaluated": total_evaluated,
|
|
}
|
|
|
|
|
|
# ── Coverage Velocity ────────────────────────────────────────────────
|
|
|
|
|
|
def calculate_coverage_velocity(db: Session) -> dict:
|
|
"""Calculate techniques validated per week."""
|
|
# Count techniques that changed to validated/partial in the last 12 weeks
|
|
twelve_weeks_ago = datetime.utcnow() - timedelta(weeks=12)
|
|
|
|
weekly_counts = (
|
|
db.query(
|
|
func.date_trunc("week", Technique.last_review_date).label("week"),
|
|
func.count(Technique.id).label("count"),
|
|
)
|
|
.filter(
|
|
Technique.last_review_date >= twelve_weeks_ago,
|
|
Technique.last_review_date.isnot(None),
|
|
)
|
|
.group_by(func.date_trunc("week", Technique.last_review_date))
|
|
.order_by("week")
|
|
.all()
|
|
)
|
|
|
|
if weekly_counts:
|
|
counts = [row.count for row in weekly_counts]
|
|
avg_per_week = round(sum(counts) / len(counts), 1)
|
|
# Trend: compare last 4 weeks vs previous 4 weeks
|
|
recent = counts[-4:] if len(counts) >= 4 else counts
|
|
earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if counts else []
|
|
|
|
recent_avg = sum(recent) / len(recent) if recent else 0
|
|
earlier_avg = sum(earlier) / len(earlier) if earlier else 0
|
|
|
|
if recent_avg > earlier_avg * 1.1:
|
|
trend = "improving"
|
|
elif recent_avg < earlier_avg * 0.9:
|
|
trend = "declining"
|
|
else:
|
|
trend = "stable"
|
|
else:
|
|
avg_per_week = 0
|
|
trend = "stable"
|
|
|
|
return {
|
|
"techniques_per_week": avg_per_week,
|
|
"trend": trend,
|
|
}
|
|
|
|
|
|
# ── Validation Throughput ────────────────────────────────────────────
|
|
|
|
|
|
def calculate_validation_throughput(db: Session) -> dict:
|
|
"""Pipeline Conversion Rate — activity-based, no time dependency.
|
|
|
|
Measures what percentage of tests that have entered the validation
|
|
phase have been successfully approved (validated).
|
|
|
|
formula: validated / (validated + rejected + in_review) * 100
|
|
|
|
100% = every test that reached validation was approved.
|
|
0% = nothing has been validated yet.
|
|
Lower = backlog or quality issues blocking approvals.
|
|
"""
|
|
validated_count = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state == TestState.validated)
|
|
.scalar()
|
|
) or 0
|
|
|
|
rejected_count = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state == TestState.rejected)
|
|
.scalar()
|
|
) or 0
|
|
|
|
in_review_count = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state == TestState.in_review)
|
|
.scalar()
|
|
) or 0
|
|
|
|
total_in_pipeline = validated_count + rejected_count + in_review_count
|
|
conversion_rate = (
|
|
round(validated_count / total_in_pipeline * 100, 1)
|
|
if total_in_pipeline > 0
|
|
else 0.0
|
|
)
|
|
|
|
# Trend: compare conversion rate when considering pending tests
|
|
# High pending backlog relative to validated = declining
|
|
if total_in_pipeline == 0:
|
|
trend = "stable"
|
|
elif in_review_count > validated_count:
|
|
trend = "declining" # backlog building up
|
|
elif conversion_rate >= 80:
|
|
trend = "improving" # most tests making it through
|
|
else:
|
|
trend = "stable"
|
|
|
|
return {
|
|
"tests_per_week": conversion_rate, # reuse key for API compat
|
|
"conversion_rate": conversion_rate,
|
|
"validated": validated_count,
|
|
"rejected": rejected_count,
|
|
"in_review": in_review_count,
|
|
"trend": trend,
|
|
}
|
|
|
|
|
|
# ── Rejection Rate ──────────────────────────────────────────────────
|
|
|
|
|
|
def calculate_rejection_rate(db: Session) -> dict:
|
|
"""Calculate rejection rate, broken down by red_lead and blue_lead."""
|
|
validated_count = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state == TestState.validated)
|
|
.scalar()
|
|
) or 0
|
|
|
|
rejected_count = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state == TestState.rejected)
|
|
.scalar()
|
|
) or 0
|
|
|
|
total = validated_count + rejected_count
|
|
overall_pct = round((rejected_count / total) * 100, 1) if total > 0 else 0
|
|
|
|
# By red_lead (red_validation_status == "rejected")
|
|
red_rejected = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.red_validation_status == "rejected")
|
|
.scalar()
|
|
) or 0
|
|
red_total = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.red_validation_status.in_(["approved", "rejected"]))
|
|
.scalar()
|
|
) or 0
|
|
red_pct = round((red_rejected / red_total) * 100, 1) if red_total > 0 else 0
|
|
|
|
# By blue_lead
|
|
blue_rejected = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.blue_validation_status == "rejected")
|
|
.scalar()
|
|
) or 0
|
|
blue_total = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.blue_validation_status.in_(["approved", "rejected"]))
|
|
.scalar()
|
|
) or 0
|
|
blue_pct = round((blue_rejected / blue_total) * 100, 1) if blue_total > 0 else 0
|
|
|
|
return {
|
|
"percentage": overall_pct,
|
|
"by_red_lead": red_pct,
|
|
"by_blue_lead": blue_pct,
|
|
}
|
|
|
|
|
|
# ── Aggregated Operational Metrics ───────────────────────────────────
|
|
|
|
|
|
def get_all_operational_metrics(db: Session) -> dict:
|
|
"""Get all operational metrics in a single response."""
|
|
return {
|
|
"mttd": calculate_mttd(db),
|
|
"mttr": calculate_mttr(db),
|
|
"detection_efficacy": calculate_detection_efficacy(db),
|
|
"alert_fidelity": calculate_alert_fidelity(db),
|
|
"coverage_velocity": calculate_coverage_velocity(db),
|
|
"validation_throughput": calculate_validation_throughput(db),
|
|
"rejection_rate": calculate_rejection_rate(db),
|
|
}
|
|
|
|
|
|
# ── Trend Data ───────────────────────────────────────────────────────
|
|
|
|
|
|
def get_operational_trend(db: Session, period: str = "90d") -> list:
|
|
"""Get weekly trend data for operational metrics."""
|
|
now = datetime.utcnow()
|
|
if period == "30d":
|
|
start = now - timedelta(days=30)
|
|
elif period == "1y":
|
|
start = now - timedelta(days=365)
|
|
else:
|
|
start = now - timedelta(days=90)
|
|
|
|
# Build weekly data points
|
|
data_points = []
|
|
current = start
|
|
while current < now:
|
|
week_end = min(current + timedelta(days=7), now)
|
|
|
|
# Detection efficacy for tests validated up to this week
|
|
validated_up_to = (
|
|
db.query(Test)
|
|
.filter(
|
|
Test.state == TestState.validated,
|
|
Test.red_validated_at <= week_end,
|
|
)
|
|
.all()
|
|
)
|
|
|
|
total = len(validated_up_to)
|
|
detected = len([t for t in validated_up_to if t.detection_result == TestResult.detected])
|
|
efficacy = round((detected / total) * 100, 1) if total > 0 else 0
|
|
|
|
data_points.append({
|
|
"date": current.strftime("%Y-%m-%d"),
|
|
"detection_efficacy": efficacy,
|
|
"validated_tests": total,
|
|
"detected_tests": detected,
|
|
})
|
|
|
|
current = week_end
|
|
|
|
return data_points
|
|
|
|
|
|
# ── By Team ──────────────────────────────────────────────────────────
|
|
|
|
|
|
def get_metrics_by_team(db: Session) -> dict:
|
|
"""Get metrics broken down by Red vs Blue team."""
|
|
# Red team metrics
|
|
red_tests_completed = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state.in_([
|
|
TestState.blue_evaluating,
|
|
TestState.in_review,
|
|
TestState.validated,
|
|
TestState.rejected,
|
|
]))
|
|
.scalar()
|
|
) or 0
|
|
|
|
red_avg_time = None
|
|
red_times = []
|
|
# Red team avg execution time: red_started_at → blue_started_at (net of paused)
|
|
tests_with_red = (
|
|
db.query(Test)
|
|
.filter(
|
|
Test.red_started_at.isnot(None),
|
|
Test.blue_started_at.isnot(None),
|
|
)
|
|
.all()
|
|
)
|
|
for t in tests_with_red:
|
|
gross = (t.blue_started_at - t.red_started_at).total_seconds()
|
|
net = gross - (t.red_paused_seconds or 0)
|
|
if net > 0:
|
|
red_times.append(net / 3600)
|
|
if red_times:
|
|
red_avg_time = round(sum(red_times) / len(red_times), 1)
|
|
|
|
# Blue team: count tests that reached the blue evaluation phase
|
|
blue_tests_completed = (
|
|
db.query(func.count(Test.id))
|
|
.filter(Test.state.in_([
|
|
TestState.in_review,
|
|
TestState.validated,
|
|
TestState.rejected,
|
|
]))
|
|
.scalar()
|
|
) or 0
|
|
|
|
# Blue avg evaluation time:
|
|
# Prefer blue_work_started_at (actual pick-up) → blue_validated_at.
|
|
# Fall back to blue_started_at if blue_work_started_at is not set.
|
|
blue_avg_time = None
|
|
blue_times = []
|
|
tests_with_blue = (
|
|
db.query(Test)
|
|
.filter(
|
|
Test.blue_started_at.isnot(None),
|
|
Test.blue_validated_at.isnot(None),
|
|
)
|
|
.all()
|
|
)
|
|
for t in tests_with_blue:
|
|
phase_start = t.blue_work_started_at or t.blue_started_at
|
|
gross = (t.blue_validated_at - phase_start).total_seconds()
|
|
net = gross - (t.blue_paused_seconds or 0)
|
|
if net > 0:
|
|
blue_times.append(net / 3600)
|
|
if blue_times:
|
|
blue_avg_time = round(sum(blue_times) / len(blue_times), 1)
|
|
|
|
return {
|
|
"red_team": {
|
|
"tests_completed": red_tests_completed,
|
|
"avg_completion_hours": red_avg_time,
|
|
"rejection_rate": calculate_rejection_rate(db)["by_red_lead"],
|
|
},
|
|
"blue_team": {
|
|
"tests_completed": blue_tests_completed,
|
|
"avg_completion_hours": blue_avg_time,
|
|
"rejection_rate": calculate_rejection_rate(db)["by_blue_lead"],
|
|
},
|
|
}
|