Files
Aegis/backend/app/services/operational_metrics_service.py
kitos f53500bcb5
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled
fix(exec-dashboard): replace time-dependent throughput with Pipeline Conversion %
'Validation Throughput (tests/week)' was time-dependent — director wanted
an activity-based metric instead.

New metric: Pipeline Conversion Rate
  formula: validated / (validated + rejected + in_review) × 100
  unit: %  (no time reference)
  meaning: 'of all tests that have entered validation, X% succeeded'
  trend: declining if in_review backlog > validated count,
         improving if conversion ≥ 80%, stable otherwise

Backend: calculate_validation_throughput() rewritten — same API key
(tests_per_week) kept for compatibility, new conversion_rate field added.
Frontend: label → 'Pipeline Conversion', unit → '%', tooltip updated.
2026-06-03 10:06:30 +02:00

483 lines
16 KiB
Python

"""Operational metrics service — MTTD, MTTR, Detection Efficacy, and more.
Calculates security operations KPIs from test data and audit logs.
"""
from datetime import datetime, timedelta
from typing import Optional
from sqlalchemy import func, case, and_, or_, extract
from sqlalchemy.orm import Session
from app.models.test import Test
from app.models.technique import Technique
from app.models.test_detection_result import TestDetectionResult
from app.models.audit import AuditLog
from app.models.enums import TestState, TestResult
def _safe_stats(values: list[float]) -> dict:
"""Compute mean, median, min, max from a list of floats."""
if not values:
return None
sorted_vals = sorted(values)
n = len(sorted_vals)
return {
"mean_hours": round(sum(sorted_vals) / n, 1),
"median_hours": round(sorted_vals[n // 2], 1),
"min_hours": round(sorted_vals[0], 1),
"max_hours": round(sorted_vals[-1], 1),
"sample_size": n,
}
# ── MTTD (Mean Time to Detect) ───────────────────────────────────────
def calculate_mttd(db: Session) -> Optional[dict]:
"""Calculate Mean Time to Detect.
For each validated test: time between entering red_executing and
entering blue_evaluating (extracted from audit_log timestamps).
"""
# Get validated tests that have both timestamps available
# Using audit log entries for state transitions
tests = (
db.query(Test)
.filter(Test.state == TestState.validated)
.all()
)
detection_times = []
for test in tests:
# Find the red_executing and blue_evaluating transition timestamps
red_start = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.in_(["test_start_execution", "start_execution"]),
)
.order_by(AuditLog.timestamp.asc())
.first()
)
blue_start = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.in_(["test_submit_red", "submit_red"]),
)
.order_by(AuditLog.timestamp.asc())
.first()
)
if red_start and blue_start and blue_start[0] > red_start[0]:
hours = (blue_start[0] - red_start[0]).total_seconds() / 3600
detection_times.append(hours)
return _safe_stats(detection_times)
# ── MTTR (Mean Time to Respond/Remediate) ─────────────────────────────
def calculate_mttr(db: Session) -> Optional[dict]:
"""Calculate Mean Time to Respond.
For tests with remediation_status = completed: time between
detection_result being set and remediation_status = completed.
"""
# Tests with completed remediation
tests = (
db.query(Test)
.filter(
Test.remediation_status == "completed",
Test.blue_validated_at.isnot(None),
)
.all()
)
response_times = []
for test in tests:
# Find when remediation was completed from audit log
remediation_complete = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.ilike("%remediation%"),
)
.order_by(AuditLog.timestamp.desc())
.first()
)
detection_time = test.blue_validated_at
if remediation_complete and detection_time:
hours = (remediation_complete[0] - detection_time).total_seconds() / 3600
if hours > 0:
response_times.append(hours)
return _safe_stats(response_times)
# ── Detection Efficacy ───────────────────────────────────────────────
def calculate_detection_efficacy(db: Session) -> dict:
"""Calculate detection efficacy: detected / total validated tests."""
validated_tests = (
db.query(Test)
.filter(Test.state == TestState.validated)
.all()
)
total = len(validated_tests)
if total == 0:
return {
"percentage": 0,
"detected": 0,
"partially": 0,
"not_detected": 0,
"total": 0,
}
detected = len([t for t in validated_tests if t.detection_result == TestResult.detected])
partially = len([t for t in validated_tests if t.detection_result == TestResult.partially_detected])
not_detected = len([t for t in validated_tests if t.detection_result == TestResult.not_detected])
percentage = round((detected / total) * 100, 1) if total > 0 else 0
return {
"percentage": percentage,
"detected": detected,
"partially": partially,
"not_detected": not_detected,
"total": total,
}
# ── Alert Fidelity ──────────────────────────────────────────────────
def calculate_alert_fidelity(db: Session) -> dict:
"""Calculate alert fidelity: ratio of triggered detection rules."""
total_evaluated = (
db.query(func.count(TestDetectionResult.id))
.filter(TestDetectionResult.triggered.isnot(None))
.scalar()
) or 0
triggered = (
db.query(func.count(TestDetectionResult.id))
.filter(TestDetectionResult.triggered == True)
.scalar()
) or 0
not_triggered = total_evaluated - triggered
return {
"percentage": round((triggered / total_evaluated) * 100, 1) if total_evaluated > 0 else 0,
"triggered": triggered,
"not_triggered": not_triggered,
"total_evaluated": total_evaluated,
}
# ── Coverage Velocity ────────────────────────────────────────────────
def calculate_coverage_velocity(db: Session) -> dict:
"""Calculate techniques validated per week."""
# Count techniques that changed to validated/partial in the last 12 weeks
twelve_weeks_ago = datetime.utcnow() - timedelta(weeks=12)
weekly_counts = (
db.query(
func.date_trunc("week", Technique.last_review_date).label("week"),
func.count(Technique.id).label("count"),
)
.filter(
Technique.last_review_date >= twelve_weeks_ago,
Technique.last_review_date.isnot(None),
)
.group_by(func.date_trunc("week", Technique.last_review_date))
.order_by("week")
.all()
)
if weekly_counts:
counts = [row.count for row in weekly_counts]
avg_per_week = round(sum(counts) / len(counts), 1)
# Trend: compare last 4 weeks vs previous 4 weeks
recent = counts[-4:] if len(counts) >= 4 else counts
earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if counts else []
recent_avg = sum(recent) / len(recent) if recent else 0
earlier_avg = sum(earlier) / len(earlier) if earlier else 0
if recent_avg > earlier_avg * 1.1:
trend = "improving"
elif recent_avg < earlier_avg * 0.9:
trend = "declining"
else:
trend = "stable"
else:
avg_per_week = 0
trend = "stable"
return {
"techniques_per_week": avg_per_week,
"trend": trend,
}
# ── Validation Throughput ────────────────────────────────────────────
def calculate_validation_throughput(db: Session) -> dict:
"""Pipeline Conversion Rate — activity-based, no time dependency.
Measures what percentage of tests that have entered the validation
phase have been successfully approved (validated).
formula: validated / (validated + rejected + in_review) * 100
100% = every test that reached validation was approved.
0% = nothing has been validated yet.
Lower = backlog or quality issues blocking approvals.
"""
validated_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.validated)
.scalar()
) or 0
rejected_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.rejected)
.scalar()
) or 0
in_review_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.in_review)
.scalar()
) or 0
total_in_pipeline = validated_count + rejected_count + in_review_count
conversion_rate = (
round(validated_count / total_in_pipeline * 100, 1)
if total_in_pipeline > 0
else 0.0
)
# Trend: compare conversion rate when considering pending tests
# High pending backlog relative to validated = declining
if total_in_pipeline == 0:
trend = "stable"
elif in_review_count > validated_count:
trend = "declining" # backlog building up
elif conversion_rate >= 80:
trend = "improving" # most tests making it through
else:
trend = "stable"
return {
"tests_per_week": conversion_rate, # reuse key for API compat
"conversion_rate": conversion_rate,
"validated": validated_count,
"rejected": rejected_count,
"in_review": in_review_count,
"trend": trend,
}
# ── Rejection Rate ──────────────────────────────────────────────────
def calculate_rejection_rate(db: Session) -> dict:
"""Calculate rejection rate, broken down by red_lead and blue_lead."""
validated_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.validated)
.scalar()
) or 0
rejected_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.rejected)
.scalar()
) or 0
total = validated_count + rejected_count
overall_pct = round((rejected_count / total) * 100, 1) if total > 0 else 0
# By red_lead (red_validation_status == "rejected")
red_rejected = (
db.query(func.count(Test.id))
.filter(Test.red_validation_status == "rejected")
.scalar()
) or 0
red_total = (
db.query(func.count(Test.id))
.filter(Test.red_validation_status.in_(["approved", "rejected"]))
.scalar()
) or 0
red_pct = round((red_rejected / red_total) * 100, 1) if red_total > 0 else 0
# By blue_lead
blue_rejected = (
db.query(func.count(Test.id))
.filter(Test.blue_validation_status == "rejected")
.scalar()
) or 0
blue_total = (
db.query(func.count(Test.id))
.filter(Test.blue_validation_status.in_(["approved", "rejected"]))
.scalar()
) or 0
blue_pct = round((blue_rejected / blue_total) * 100, 1) if blue_total > 0 else 0
return {
"percentage": overall_pct,
"by_red_lead": red_pct,
"by_blue_lead": blue_pct,
}
# ── Aggregated Operational Metrics ───────────────────────────────────
def get_all_operational_metrics(db: Session) -> dict:
"""Get all operational metrics in a single response."""
return {
"mttd": calculate_mttd(db),
"mttr": calculate_mttr(db),
"detection_efficacy": calculate_detection_efficacy(db),
"alert_fidelity": calculate_alert_fidelity(db),
"coverage_velocity": calculate_coverage_velocity(db),
"validation_throughput": calculate_validation_throughput(db),
"rejection_rate": calculate_rejection_rate(db),
}
# ── Trend Data ───────────────────────────────────────────────────────
def get_operational_trend(db: Session, period: str = "90d") -> list:
"""Get weekly trend data for operational metrics."""
now = datetime.utcnow()
if period == "30d":
start = now - timedelta(days=30)
elif period == "1y":
start = now - timedelta(days=365)
else:
start = now - timedelta(days=90)
# Build weekly data points
data_points = []
current = start
while current < now:
week_end = min(current + timedelta(days=7), now)
# Detection efficacy for tests validated up to this week
validated_up_to = (
db.query(Test)
.filter(
Test.state == TestState.validated,
Test.red_validated_at <= week_end,
)
.all()
)
total = len(validated_up_to)
detected = len([t for t in validated_up_to if t.detection_result == TestResult.detected])
efficacy = round((detected / total) * 100, 1) if total > 0 else 0
data_points.append({
"date": current.strftime("%Y-%m-%d"),
"detection_efficacy": efficacy,
"validated_tests": total,
"detected_tests": detected,
})
current = week_end
return data_points
# ── By Team ──────────────────────────────────────────────────────────
def get_metrics_by_team(db: Session) -> dict:
"""Get metrics broken down by Red vs Blue team."""
# Red team metrics
red_tests_completed = (
db.query(func.count(Test.id))
.filter(Test.state.in_([
TestState.blue_evaluating,
TestState.in_review,
TestState.validated,
TestState.rejected,
]))
.scalar()
) or 0
red_avg_time = None
red_times = []
# Time for red team to complete their phase
tests_with_red = (
db.query(Test)
.filter(Test.red_validated_at.isnot(None), Test.created_at.isnot(None))
.all()
)
for t in tests_with_red:
hours = (t.red_validated_at - t.created_at).total_seconds() / 3600
if hours > 0:
red_times.append(hours)
if red_times:
red_avg_time = round(sum(red_times) / len(red_times), 1)
# Blue team metrics
blue_tests_completed = (
db.query(func.count(Test.id))
.filter(Test.state.in_([
TestState.in_review,
TestState.validated,
TestState.rejected,
]))
.scalar()
) or 0
blue_avg_time = None
blue_times = []
tests_with_blue = (
db.query(Test)
.filter(
Test.blue_validated_at.isnot(None),
Test.red_validated_at.isnot(None),
)
.all()
)
for t in tests_with_blue:
hours = (t.blue_validated_at - t.red_validated_at).total_seconds() / 3600
if hours > 0:
blue_times.append(hours)
if blue_times:
blue_avg_time = round(sum(blue_times) / len(blue_times), 1)
return {
"red_team": {
"tests_completed": red_tests_completed,
"avg_completion_hours": red_avg_time,
"rejection_rate": calculate_rejection_rate(db)["by_red_lead"],
},
"blue_team": {
"tests_completed": blue_tests_completed,
"avg_completion_hours": blue_avg_time,
"rejection_rate": calculate_rejection_rate(db)["by_blue_lead"],
},
}