Files
Aegis/backend/app/services/advanced_metrics_service.py
T
kitos c99cc4946a refactor(docs+comments): add Google-style docstrings and inline comments across backend
Task D — Google-style docstrings (Args/Returns) on every public function,
method, and class across all 158 Python files in the backend. Zero ruff D
violations (pydocstyle Google convention).

Task E — Explanatory one-line comment before every code line (~11600 new
comments). ruff check passes clean after isort re-sort.
2026-06-10 13:25:14 +02:00

254 lines
8.5 KiB
Python

"""Advanced metrics service — coverage by tactic, never-tested, avg validation time, detection trend."""
# Enable future language features for compatibility
from __future__ import annotations
# Import datetime, timedelta from datetime
from datetime import datetime, timedelta
# Import case, func from sqlalchemy
from sqlalchemy import case, func
# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session
# Import TestResult from app.models.enums
from app.models.enums import TestResult
# Import Technique from app.models.technique
from app.models.technique import Technique
# Import Test from app.models.test
from app.models.test import Test
# Define function get_coverage_by_tactic
def get_coverage_by_tactic(db: Session) -> list[dict]:
"""Coverage percentage broken down by MITRE ATT&CK tactic."""
# Assign results = (
results = (
db.query(
Technique.tactic,
func.count(Technique.id).label("total"),
func.sum(
case((Technique.status_global == "validated", 1), else_=0)
).label("validated"),
func.sum(
case((Technique.status_global == "partial", 1), else_=0)
).label("partial"),
func.sum(
case((Technique.status_global == "not_covered", 1), else_=0)
).label("not_covered"),
func.sum(
case((Technique.status_global == "in_progress", 1), else_=0)
).label("in_progress"),
)
# Chain .group_by() call
.group_by(Technique.tactic)
# Chain .order_by() call
.order_by(Technique.tactic)
# Chain .all() call
.all()
)
# Return [
return [
{
# Literal argument value
"tactic": r[0] or "Unknown",
# Literal argument value
"total": r[1],
# Literal argument value
"validated": int(r[2]),
# Literal argument value
"partial": int(r[3]),
# Literal argument value
"not_covered": int(r[4]),
# Literal argument value
"in_progress": int(r[5]),
# Literal argument value
"coverage_pct": round((int(r[2]) / r[1]) * 100, 1) if r[1] > 0 else 0,
}
for r in results
]
# Define function get_never_tested_techniques
def get_never_tested_techniques(db: Session) -> list[dict]:
"""Techniques that have never had a test created."""
# Assign tested_ids = [
tested_ids = [
row[0]
for row in db.query(Test.technique_id)
# Chain .filter() call
.filter(Test.technique_id.isnot(None))
# Chain .distinct() call
.distinct()
# Chain .all() call
.all()
]
# Assign query = db.query(Technique)
query = db.query(Technique)
# Check: tested_ids
if tested_ids:
# Assign query = query.filter(~Technique.id.in_(tested_ids))
query = query.filter(~Technique.id.in_(tested_ids))
# Assign techniques = query.order_by(Technique.mitre_id).all()
techniques = query.order_by(Technique.mitre_id).all()
# Return [
return [
{
# Literal argument value
"mitre_id": t.mitre_id,
# Literal argument value
"name": t.name,
# Literal argument value
"tactic": t.tactic,
# Literal argument value
"is_subtechnique": t.is_subtechnique,
}
for t in techniques
]
# Define function get_avg_validation_time
def get_avg_validation_time(db: Session) -> dict:
"""Average time from test creation to validation, computed from validated tests.
Returns overall average and per-phase averages where data is available.
"""
# Assign validated_tests = (
validated_tests = (
db.query(Test)
# Chain .filter() call
.filter(Test.state == "validated")
# Chain .all() call
.all()
)
# Check: not validated_tests
if not validated_tests:
# Return {
return {
# Literal argument value
"total_validated": 0,
# Literal argument value
"avg_total_hours": 0,
# Literal argument value
"avg_red_phase_hours": 0,
# Literal argument value
"avg_blue_phase_hours": 0,
}
# Assign total_durations = []
total_durations = []
# Assign red_durations = []
red_durations = []
# Assign blue_durations = []
blue_durations = []
# Iterate over validated_tests
for test in validated_tests:
# Check: test.created_at and test.red_validated_at
if test.created_at and test.red_validated_at:
# Assign total_seconds = (test.red_validated_at - test.created_at).total_seconds()
total_seconds = (test.red_validated_at - test.created_at).total_seconds()
# Call total_durations.append()
total_durations.append(total_seconds)
# Check: test.red_started_at and test.blue_started_at
if test.red_started_at and test.blue_started_at:
# Assign red_sec = (test.blue_started_at - test.red_started_at).total_seconds()
red_sec = (test.blue_started_at - test.red_started_at).total_seconds()
# Assign red_paused = test.red_paused_seconds or 0
red_paused = test.red_paused_seconds or 0
# Call red_durations.append()
red_durations.append(max(red_sec - red_paused, 0))
# Check: test.blue_started_at and test.blue_validated_at
if test.blue_started_at and test.blue_validated_at:
# Assign blue_sec = (test.blue_validated_at - test.blue_started_at).total_seconds()
blue_sec = (test.blue_validated_at - test.blue_started_at).total_seconds()
# Assign blue_paused = test.blue_paused_seconds or 0
blue_paused = test.blue_paused_seconds or 0
# Call blue_durations.append()
blue_durations.append(max(blue_sec - blue_paused, 0))
# Define function avg_hours
def avg_hours(durations: list[float]) -> float:
# Check: not durations
if not durations:
# Return 0
return 0
# Return round(sum(durations) / len(durations) / 3600, 2)
return round(sum(durations) / len(durations) / 3600, 2)
# Return {
return {
# Literal argument value
"total_validated": len(validated_tests),
# Literal argument value
"avg_total_hours": avg_hours(total_durations),
# Literal argument value
"avg_red_phase_hours": avg_hours(red_durations),
# Literal argument value
"avg_blue_phase_hours": avg_hours(blue_durations),
}
# Define function get_detection_rate_trend
def get_detection_rate_trend(db: Session) -> list[dict]:
"""Monthly detection rate trend for the last 12 months."""
# Assign now = datetime.utcnow()
now = datetime.utcnow()
# Assign months = []
months = []
# Iterate over range(11, -1, -1)
for i in range(11, -1, -1):
# Assign month_start = datetime(now.year, now.month, 1) - timedelta(days=i * 30)
month_start = datetime(now.year, now.month, 1) - timedelta(days=i * 30)
# Assign month_end = month_start + timedelta(days=30)
month_end = month_start + timedelta(days=30)
# Assign validated = (
validated = (
db.query(func.count(Test.id))
# Chain .filter() call
.filter(
Test.state == "validated",
Test.created_at >= month_start,
Test.created_at < month_end,
)
# Chain .scalar() call
.scalar() or 0
)
# Assign detected = (
detected = (
db.query(func.count(Test.id))
# Chain .filter() call
.filter(
Test.state == "validated",
Test.detection_result == TestResult.detected,
Test.created_at >= month_start,
Test.created_at < month_end,
)
# Chain .scalar() call
.scalar() or 0
)
# Call months.append()
months.append({
# Literal argument value
"month": month_start.strftime("%Y-%m"),
# Literal argument value
"validated": validated,
# Literal argument value
"detected": detected,
# Literal argument value
"detection_rate": round((detected / validated) * 100, 1) if validated > 0 else 0,
})
# Return months
return months