Files
Aegis/backend/app/services/operational_metrics_service.py
T
kitos 8f98bdd273 refactor(pep8): enforce full PEP8 compliance across backend Python codebase
- ruff.toml: select E/W/F/I/N rules, line-length=120, drop legacy ignores
- Auto-fix: sort 82 import blocks (isort), remove 29 unused imports,
  strip 6 trailing-whitespace blank lines in docstrings
- main.py: move setup_logging and settings imports to top (E402)
- errors.py: noqa N818 on DDD exception names (96 call sites, safe)
- intel_service.py: noqa N817 for universal ET alias
- atomic/elastic/sigma import services: move _MAX_UNCOMPRESSED_SIZE and
  _MAX_ENTRIES to module level (N806)
- compliance_import_service.py: move SAMPLE_CONTROLS / CIS_CONTROLS to
  module level; wrap long description strings (N806 + E501)
- snapshot_service.py: move STATUS_ORDER dict to module level (N806)
- sigma_import_service.py: remove dead dedup_key expression (F841)
- threat_actor_import_service.py: remove dead stix_to_actor expression (F841)
- data_source.py, seed_demo.py, campaign_scheduler_service.py,
  lolbas_import_service.py: wrap lines exceeding 120 chars (E501)
- d3fend_import_service.py: per-file E501 ignore (data file with long strings)

All 439 unit tests pass. ruff check app/ → All checks passed!
2026-06-11 11:06:54 +02:00

469 lines
15 KiB
Python

"""Operational metrics service — MTTD, MTTR, Detection Efficacy, and more.
Calculates security operations KPIs from test data and audit logs.
"""
from datetime import datetime, timedelta
from typing import Optional
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.models.audit import AuditLog
from app.models.enums import TestResult, TestState
from app.models.technique import Technique
from app.models.test import Test
from app.models.test_detection_result import TestDetectionResult
def _safe_stats(values: list[float]) -> dict:
"""Compute mean, median, min, max from a list of floats."""
if not values:
return None
sorted_vals = sorted(values)
n = len(sorted_vals)
return {
"mean_hours": round(sum(sorted_vals) / n, 1),
"median_hours": round(sorted_vals[n // 2], 1),
"min_hours": round(sorted_vals[0], 1),
"max_hours": round(sorted_vals[-1], 1),
"sample_size": n,
}
# ── MTTD (Mean Time to Detect) ───────────────────────────────────────
def calculate_mttd(db: Session) -> Optional[dict]:
"""Calculate Mean Time to Detect.
For each validated test: time between entering red_executing and
entering blue_evaluating (extracted from audit_log timestamps).
"""
# Get validated tests that have both timestamps available
# Using audit log entries for state transitions
tests = (
db.query(Test)
.filter(Test.state == TestState.validated)
.all()
)
detection_times = []
for test in tests:
# Find the red_executing and blue_evaluating transition timestamps
red_start = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.in_(["test_start_execution", "start_execution"]),
)
.order_by(AuditLog.timestamp.asc())
.first()
)
blue_start = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.in_(["test_submit_red", "submit_red"]),
)
.order_by(AuditLog.timestamp.asc())
.first()
)
if red_start and blue_start and blue_start[0] > red_start[0]:
hours = (blue_start[0] - red_start[0]).total_seconds() / 3600
detection_times.append(hours)
return _safe_stats(detection_times)
# ── MTTR (Mean Time to Respond/Remediate) ─────────────────────────────
def calculate_mttr(db: Session) -> Optional[dict]:
"""Calculate Mean Time to Respond.
For tests with remediation_status = completed: time between
detection_result being set and remediation_status = completed.
"""
# Tests with completed remediation
tests = (
db.query(Test)
.filter(
Test.remediation_status == "completed",
Test.blue_validated_at.isnot(None),
)
.all()
)
response_times = []
for test in tests:
# Find when remediation was completed from audit log
remediation_complete = (
db.query(AuditLog.timestamp)
.filter(
AuditLog.entity_type == "test",
AuditLog.entity_id == str(test.id),
AuditLog.action.ilike("%remediation%"),
)
.order_by(AuditLog.timestamp.desc())
.first()
)
detection_time = test.blue_validated_at
if remediation_complete and detection_time:
hours = (remediation_complete[0] - detection_time).total_seconds() / 3600
if hours > 0:
response_times.append(hours)
return _safe_stats(response_times)
# ── Detection Efficacy ───────────────────────────────────────────────
def calculate_detection_efficacy(db: Session) -> dict:
"""Calculate detection efficacy: detected / total validated tests."""
validated_tests = (
db.query(Test)
.filter(Test.state == TestState.validated)
.all()
)
total = len(validated_tests)
if total == 0:
return {
"percentage": 0,
"detected": 0,
"partially": 0,
"not_detected": 0,
"total": 0,
}
detected = len([t for t in validated_tests if t.detection_result == TestResult.detected])
partially = len([t for t in validated_tests if t.detection_result == TestResult.partially_detected])
not_detected = len([t for t in validated_tests if t.detection_result == TestResult.not_detected])
percentage = round((detected / total) * 100, 1) if total > 0 else 0
return {
"percentage": percentage,
"detected": detected,
"partially": partially,
"not_detected": not_detected,
"total": total,
}
# ── Alert Fidelity ──────────────────────────────────────────────────
def calculate_alert_fidelity(db: Session) -> dict:
"""Calculate alert fidelity: ratio of triggered detection rules."""
total_evaluated = (
db.query(func.count(TestDetectionResult.id))
.filter(TestDetectionResult.triggered.isnot(None))
.scalar()
) or 0
triggered = (
db.query(func.count(TestDetectionResult.id))
.filter(TestDetectionResult.triggered == True)
.scalar()
) or 0
not_triggered = total_evaluated - triggered
return {
"percentage": round((triggered / total_evaluated) * 100, 1) if total_evaluated > 0 else 0,
"triggered": triggered,
"not_triggered": not_triggered,
"total_evaluated": total_evaluated,
}
# ── Coverage Velocity ────────────────────────────────────────────────
def calculate_coverage_velocity(db: Session) -> dict:
"""Calculate techniques validated per week."""
# Count techniques that changed to validated/partial in the last 12 weeks
twelve_weeks_ago = datetime.utcnow() - timedelta(weeks=12)
weekly_counts = (
db.query(
func.date_trunc("week", Technique.last_review_date).label("week"),
func.count(Technique.id).label("count"),
)
.filter(
Technique.last_review_date >= twelve_weeks_ago,
Technique.last_review_date.isnot(None),
)
.group_by(func.date_trunc("week", Technique.last_review_date))
.order_by("week")
.all()
)
if weekly_counts:
counts = [row.count for row in weekly_counts]
avg_per_week = round(sum(counts) / len(counts), 1)
# Trend: compare last 4 weeks vs previous 4 weeks
recent = counts[-4:] if len(counts) >= 4 else counts
earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if counts else []
recent_avg = sum(recent) / len(recent) if recent else 0
earlier_avg = sum(earlier) / len(earlier) if earlier else 0
if recent_avg > earlier_avg * 1.1:
trend = "improving"
elif recent_avg < earlier_avg * 0.9:
trend = "declining"
else:
trend = "stable"
else:
avg_per_week = 0
trend = "stable"
return {
"techniques_per_week": avg_per_week,
"trend": trend,
}
# ── Validation Throughput ────────────────────────────────────────────
def calculate_validation_throughput(db: Session) -> dict:
"""Calculate tests validated/rejected per week."""
twelve_weeks_ago = datetime.utcnow() - timedelta(weeks=12)
# Tests validated
validated_weekly = (
db.query(
func.date_trunc("week", Test.red_validated_at).label("week"),
func.count(Test.id).label("count"),
)
.filter(
Test.red_validated_at >= twelve_weeks_ago,
Test.state.in_([TestState.validated, TestState.rejected]),
)
.group_by(func.date_trunc("week", Test.red_validated_at))
.order_by("week")
.all()
)
if validated_weekly:
counts = [row.count for row in validated_weekly]
avg_per_week = round(sum(counts) / len(counts), 1)
recent = counts[-4:] if len(counts) >= 4 else counts
earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if counts else []
recent_avg = sum(recent) / len(recent) if recent else 0
earlier_avg = sum(earlier) / len(earlier) if earlier else 0
if recent_avg > earlier_avg * 1.1:
trend = "improving"
elif recent_avg < earlier_avg * 0.9:
trend = "declining"
else:
trend = "stable"
else:
avg_per_week = 0
trend = "stable"
return {
"tests_per_week": avg_per_week,
"trend": trend,
}
# ── Rejection Rate ──────────────────────────────────────────────────
def calculate_rejection_rate(db: Session) -> dict:
"""Calculate rejection rate, broken down by red_lead and blue_lead."""
validated_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.validated)
.scalar()
) or 0
rejected_count = (
db.query(func.count(Test.id))
.filter(Test.state == TestState.rejected)
.scalar()
) or 0
total = validated_count + rejected_count
overall_pct = round((rejected_count / total) * 100, 1) if total > 0 else 0
# By red_lead (red_validation_status == "rejected")
red_rejected = (
db.query(func.count(Test.id))
.filter(Test.red_validation_status == "rejected")
.scalar()
) or 0
red_total = (
db.query(func.count(Test.id))
.filter(Test.red_validation_status.in_(["approved", "rejected"]))
.scalar()
) or 0
red_pct = round((red_rejected / red_total) * 100, 1) if red_total > 0 else 0
# By blue_lead
blue_rejected = (
db.query(func.count(Test.id))
.filter(Test.blue_validation_status == "rejected")
.scalar()
) or 0
blue_total = (
db.query(func.count(Test.id))
.filter(Test.blue_validation_status.in_(["approved", "rejected"]))
.scalar()
) or 0
blue_pct = round((blue_rejected / blue_total) * 100, 1) if blue_total > 0 else 0
return {
"percentage": overall_pct,
"by_red_lead": red_pct,
"by_blue_lead": blue_pct,
}
# ── Aggregated Operational Metrics ───────────────────────────────────
def get_all_operational_metrics(db: Session) -> dict:
"""Get all operational metrics in a single response."""
return {
"mttd": calculate_mttd(db),
"mttr": calculate_mttr(db),
"detection_efficacy": calculate_detection_efficacy(db),
"alert_fidelity": calculate_alert_fidelity(db),
"coverage_velocity": calculate_coverage_velocity(db),
"validation_throughput": calculate_validation_throughput(db),
"rejection_rate": calculate_rejection_rate(db),
}
# ── Trend Data ───────────────────────────────────────────────────────
def get_operational_trend(db: Session, period: str = "90d") -> list:
"""Get weekly trend data for operational metrics."""
now = datetime.utcnow()
if period == "30d":
start = now - timedelta(days=30)
elif period == "1y":
start = now - timedelta(days=365)
else:
start = now - timedelta(days=90)
# Build weekly data points
data_points = []
current = start
while current < now:
week_end = min(current + timedelta(days=7), now)
# Detection efficacy for tests validated up to this week
validated_up_to = (
db.query(Test)
.filter(
Test.state == TestState.validated,
Test.red_validated_at <= week_end,
)
.all()
)
total = len(validated_up_to)
detected = len([t for t in validated_up_to if t.detection_result == TestResult.detected])
efficacy = round((detected / total) * 100, 1) if total > 0 else 0
data_points.append({
"date": current.strftime("%Y-%m-%d"),
"detection_efficacy": efficacy,
"validated_tests": total,
"detected_tests": detected,
})
current = week_end
return data_points
# ── By Team ──────────────────────────────────────────────────────────
def get_metrics_by_team(db: Session) -> dict:
"""Get metrics broken down by Red vs Blue team."""
# Red team metrics
red_tests_completed = (
db.query(func.count(Test.id))
.filter(Test.state.in_([
TestState.blue_evaluating,
TestState.in_review,
TestState.validated,
TestState.rejected,
]))
.scalar()
) or 0
red_avg_time = None
red_times = []
# Time for red team to complete their phase
tests_with_red = (
db.query(Test)
.filter(Test.red_validated_at.isnot(None), Test.created_at.isnot(None))
.all()
)
for t in tests_with_red:
hours = (t.red_validated_at - t.created_at).total_seconds() / 3600
if hours > 0:
red_times.append(hours)
if red_times:
red_avg_time = round(sum(red_times) / len(red_times), 1)
# Blue team metrics
blue_tests_completed = (
db.query(func.count(Test.id))
.filter(Test.state.in_([
TestState.in_review,
TestState.validated,
TestState.rejected,
]))
.scalar()
) or 0
blue_avg_time = None
blue_times = []
tests_with_blue = (
db.query(Test)
.filter(
Test.blue_validated_at.isnot(None),
Test.red_validated_at.isnot(None),
)
.all()
)
for t in tests_with_blue:
hours = (t.blue_validated_at - t.red_validated_at).total_seconds() / 3600
if hours > 0:
blue_times.append(hours)
if blue_times:
blue_avg_time = round(sum(blue_times) / len(blue_times), 1)
return {
"red_team": {
"tests_completed": red_tests_completed,
"avg_completion_hours": red_avg_time,
"rejection_rate": calculate_rejection_rate(db)["by_red_lead"],
},
"blue_team": {
"tests_completed": blue_tests_completed,
"avg_completion_hours": blue_avg_time,
"rejection_rate": calculate_rejection_rate(db)["by_blue_lead"],
},
}