Aegis/backend/app/services/operational_metrics_service.py

"""Operational metrics service — MTTD, MTTR, Detection Efficacy, and more.

Calculates security operations KPIs from test data and audit logs.
"""

# Import datetime, timedelta from datetime
from datetime import datetime, timedelta

# Import Optional from typing
from typing import Optional

# Import func from sqlalchemy
from sqlalchemy import func

# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session

# Import AuditLog from app.models.audit

# Import TestResult, TestState from app.models.enums
from app.models.enums import TestResult, TestState

# Import Technique from app.models.technique
from app.models.technique import Technique

# Import Test from app.models.test
from app.models.test import Test

# Import TestDetectionResult from app.models.test_detection_result
from app.models.test_detection_result import TestDetectionResult


# Define function _safe_stats
def _safe_stats(values: list[float]) -> dict:
    """Compute mean, median, min, max from a list of floats (in hours).
    For sub-hour averages, mean_hours is stored as minutes to avoid
    rounding to 0.0 which is falsy in JavaScript."""
    if not values:
        # Return None
        return None
    # Assign sorted_vals = sorted(values)
    sorted_vals = sorted(values)
    # Assign n = len(sorted_vals)
    n = len(sorted_vals)
    mean = sum(sorted_vals) / n
    # Use minutes for sub-hour values to avoid JS falsy 0.0
    mean_display = round(mean * 60, 1) if mean < 1 else round(mean, 1)
    return {
        "mean_hours": mean_display,
        "unit": "min" if mean < 1 else "hrs",
        "median_hours": round(sorted_vals[n // 2], 1),
        # Literal argument value
        "min_hours": round(sorted_vals[0], 1),
        # Literal argument value
        "max_hours": round(sorted_vals[-1], 1),
        # Literal argument value
        "sample_size": n,
    }


# ── MTTD (Mean Time to Detect) ───────────────────────────────────────


def calculate_mttd(db: Session) -> Optional[dict]:
    """Calculate Mean Time to Detect.

    Uses direct timestamp fields on the Test record:
    red_started_at  → when Red Team started the attack
    blue_started_at → when Red Team submitted to Blue (attack entered detection phase)

    MTTD = blue_started_at - red_started_at - red_paused_seconds
    Represents how long Red Team spent executing before Blue received the test.
    """
    tests = (
        db.query(Test)
        .filter(
            Test.red_started_at.isnot(None),
            Test.blue_started_at.isnot(None),
        )
        .all()
    )

    # Assign detection_times = []
    detection_times = []
    for t in tests:
        gross_secs = (t.blue_started_at - t.red_started_at).total_seconds()
        net_secs = gross_secs - (t.red_paused_seconds or 0)
        if net_secs > 0:
            detection_times.append(net_secs / 3600)

    # Return _safe_stats(detection_times)
    return _safe_stats(detection_times)


# ── MTTR (Mean Time to Respond/Remediate) ─────────────────────────────


def calculate_mttr(db: Session) -> Optional[dict]:
    """Calculate Mean Time to Respond.

    Redefined as total pipeline time from attack start to full validation:
    red_started_at → blue_validated_at (net of paused time).

    Represents how long the full security testing cycle takes end-to-end.
    Only uses tests that have been fully validated (both sides approved).
    """
    tests = (
        db.query(Test)
        # Chain .filter() call
        .filter(
            Test.state == TestState.validated,
            Test.red_started_at.isnot(None),
            Test.blue_validated_at.isnot(None),
        )
        # Chain .all() call
        .all()
    )

    # Assign response_times = []
    response_times = []
    for t in tests:
        gross_secs = (t.blue_validated_at - t.red_started_at).total_seconds()
        paused = (t.red_paused_seconds or 0) + (t.blue_paused_seconds or 0)
        net_secs = gross_secs - paused
        if net_secs > 0:
            response_times.append(net_secs / 3600)

    # Return _safe_stats(response_times)
    return _safe_stats(response_times)


# ── Detection Efficacy ───────────────────────────────────────────────


def calculate_detection_efficacy(db: Session) -> dict:
    """Calculate detection efficacy: detected / total validated tests.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``percentage``, ``detected``, ``partially``,
            ``not_detected``, and ``total``.
    """
    # Assign validated_tests = (
    validated_tests = (
        db.query(Test)
        # Chain .filter() call
        .filter(Test.state == TestState.validated)
        # Chain .all() call
        .all()
    )

    # Assign total = len(validated_tests)
    total = len(validated_tests)
    # Check: total == 0
    if total == 0:
        # Return {
        return {
            # Literal argument value
            "percentage": 0,
            # Literal argument value
            "detected": 0,
            # Literal argument value
            "partially": 0,
            # Literal argument value
            "not_detected": 0,
            # Literal argument value
            "total": 0,
        }

    # Assign detected = len([t for t in validated_tests if t.detection_result == TestResult...
    detected = len([t for t in validated_tests if t.detection_result == TestResult.detected])
    # Assign partially = len([t for t in validated_tests if t.detection_result == TestResult...
    partially = len([t for t in validated_tests if t.detection_result == TestResult.partially_detected])
    # Assign not_detected = len([t for t in validated_tests if t.detection_result == TestResult...
    not_detected = len([t for t in validated_tests if t.detection_result == TestResult.not_detected])

    # Assign percentage = round((detected / total) * 100, 1) if total > 0 else 0
    percentage = round((detected / total) * 100, 1) if total > 0 else 0

    # Return {
    return {
        # Literal argument value
        "percentage": percentage,
        # Literal argument value
        "detected": detected,
        # Literal argument value
        "partially": partially,
        # Literal argument value
        "not_detected": not_detected,
        # Literal argument value
        "total": total,
    }


# ── Alert Fidelity ──────────────────────────────────────────────────


def calculate_alert_fidelity(db: Session) -> dict:
    """Calculate alert fidelity: ratio of triggered detection rules.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``percentage``, ``triggered``, ``not_triggered``,
            and ``total_evaluated``.
    """
    # Assign total_evaluated = (
    total_evaluated = (
        db.query(func.count(TestDetectionResult.id))
        # Chain .filter() call
        .filter(TestDetectionResult.triggered.isnot(None))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign triggered = (
    triggered = (
        db.query(func.count(TestDetectionResult.id))
        # Chain .filter() call
        .filter(TestDetectionResult.triggered == True)
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign not_triggered = total_evaluated - triggered
    not_triggered = total_evaluated - triggered

    # Return {
    return {
        # Literal argument value
        "percentage": round((triggered / total_evaluated) * 100, 1) if total_evaluated > 0 else 0,
        # Literal argument value
        "triggered": triggered,
        # Literal argument value
        "not_triggered": not_triggered,
        # Literal argument value
        "total_evaluated": total_evaluated,
    }


# ── Coverage Velocity ────────────────────────────────────────────────


def calculate_coverage_velocity(db: Session) -> dict:
    """Calculate techniques validated per week.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``techniques_per_week`` (float average over the last
            12 weeks) and ``trend`` (``"improving"``, ``"stable"``, or
            ``"declining"``).
    """
    # Count techniques that changed to validated/partial in the last 12 weeks
    twelve_weeks_ago = datetime.utcnow() - timedelta(weeks=12)

    # Assign weekly_counts = (
    weekly_counts = (
        db.query(
            func.date_trunc("week", Technique.last_review_date).label("week"),
            func.count(Technique.id).label("count"),
        )
        # Chain .filter() call
        .filter(
            Technique.last_review_date >= twelve_weeks_ago,
            Technique.last_review_date.isnot(None),
        )
        # Chain .group_by() call
        .group_by(func.date_trunc("week", Technique.last_review_date))
        # Chain .order_by() call
        .order_by("week")
        # Chain .all() call
        .all()
    )

    # Check: weekly_counts
    if weekly_counts:
        # Assign counts = [row.count for row in weekly_counts]
        counts = [row.count for row in weekly_counts]
        # Assign avg_per_week = round(sum(counts) / len(counts), 1)
        avg_per_week = round(sum(counts) / len(counts), 1)
        # Trend: compare last 4 weeks vs previous 4 weeks
        recent = counts[-4:] if len(counts) >= 4 else counts
        # Assign earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if...
        earlier = counts[-8:-4] if len(counts) >= 8 else counts[:len(counts) // 2] if counts else []

        # Assign recent_avg = sum(recent) / len(recent) if recent else 0
        recent_avg = sum(recent) / len(recent) if recent else 0
        # Assign earlier_avg = sum(earlier) / len(earlier) if earlier else 0
        earlier_avg = sum(earlier) / len(earlier) if earlier else 0

        # Check: recent_avg > earlier_avg * 1.1
        if recent_avg > earlier_avg * 1.1:
            # Assign trend = "improving"
            trend = "improving"
        # Alternative: recent_avg < earlier_avg * 0.9
        elif recent_avg < earlier_avg * 0.9:
            # Assign trend = "declining"
            trend = "declining"
        # Fallback: handle remaining cases
        else:
            # Assign trend = "stable"
            trend = "stable"
    # Fallback: handle remaining cases
    else:
        # Assign avg_per_week = 0
        avg_per_week = 0
        # Assign trend = "stable"
        trend = "stable"

    # Return {
    return {
        # Literal argument value
        "techniques_per_week": avg_per_week,
        # Literal argument value
        "trend": trend,
    }


# ── Validation Throughput ────────────────────────────────────────────


def calculate_validation_throughput(db: Session) -> dict:
    """Pipeline Conversion Rate — activity-based, no time dependency.

    Measures what percentage of tests that have entered the validation
    phase have been successfully approved (validated).

    formula: validated / (validated + rejected + in_review) * 100

    100% = every test that reached validation was approved.
    0%   = nothing has been validated yet.
    Lower = backlog or quality issues blocking approvals.
    """
    validated_count = (
        db.query(func.count(Test.id))
        .filter(Test.state == TestState.validated)
        .scalar()
    ) or 0

    rejected_count = (
        db.query(func.count(Test.id))
        .filter(Test.state == TestState.rejected)
        .scalar()
    ) or 0

    in_review_count = (
        db.query(func.count(Test.id))
        .filter(Test.state == TestState.in_review)
        .scalar()
    ) or 0

    total_in_pipeline = validated_count + rejected_count + in_review_count
    conversion_rate = (
        round(validated_count / total_in_pipeline * 100, 1)
        if total_in_pipeline > 0
        else 0.0
    )

    # Trend: compare conversion rate when considering pending tests
    # High pending backlog relative to validated = declining
    if total_in_pipeline == 0:
        trend = "stable"
    elif in_review_count > validated_count:
        trend = "declining"   # backlog building up
    elif conversion_rate >= 80:
        trend = "improving"   # most tests making it through
    else:
        trend = "stable"

    # Return {
    return {
        "tests_per_week": conversion_rate,   # reuse key for API compat
        "conversion_rate": conversion_rate,
        "validated": validated_count,
        "rejected": rejected_count,
        "in_review": in_review_count,
        "trend": trend,
    }


# ── Rejection Rate ──────────────────────────────────────────────────


def calculate_rejection_rate(db: Session) -> dict:
    """Calculate rejection rate, broken down by red_lead and blue_lead.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``percentage`` (overall rejection rate), ``by_red_lead``
            (red-lead rejection percentage), and ``by_blue_lead``
            (blue-lead rejection percentage).
    """
    # Assign validated_count = (
    validated_count = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state == TestState.validated)
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign rejected_count = (
    rejected_count = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state == TestState.rejected)
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign total = validated_count + rejected_count
    total = validated_count + rejected_count
    # Assign overall_pct = round((rejected_count / total) * 100, 1) if total > 0 else 0
    overall_pct = round((rejected_count / total) * 100, 1) if total > 0 else 0

    # By red_lead (red_validation_status == "rejected")
    red_rejected = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.red_validation_status == "rejected")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign red_total = (
    red_total = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.red_validation_status.in_(["approved", "rejected"]))
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign red_pct = round((red_rejected / red_total) * 100, 1) if red_total > 0 else 0
    red_pct = round((red_rejected / red_total) * 100, 1) if red_total > 0 else 0

    # By blue_lead
    blue_rejected = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.blue_validation_status == "rejected")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign blue_total = (
    blue_total = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.blue_validation_status.in_(["approved", "rejected"]))
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign blue_pct = round((blue_rejected / blue_total) * 100, 1) if blue_total > 0 else 0
    blue_pct = round((blue_rejected / blue_total) * 100, 1) if blue_total > 0 else 0

    # Return {
    return {
        # Literal argument value
        "percentage": overall_pct,
        # Literal argument value
        "by_red_lead": red_pct,
        # Literal argument value
        "by_blue_lead": blue_pct,
    }


# ── Aggregated Operational Metrics ───────────────────────────────────


def get_all_operational_metrics(db: Session) -> dict:
    """Return all operational metrics combined in a single response.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``mttd``, ``mttr``, ``detection_efficacy``,
            ``alert_fidelity``, ``coverage_velocity``,
            ``validation_throughput``, and ``rejection_rate`` keys.
    """
    # Return {
    return {
        # Literal argument value
        "mttd": calculate_mttd(db),
        # Literal argument value
        "mttr": calculate_mttr(db),
        # Literal argument value
        "detection_efficacy": calculate_detection_efficacy(db),
        # Literal argument value
        "alert_fidelity": calculate_alert_fidelity(db),
        # Literal argument value
        "coverage_velocity": calculate_coverage_velocity(db),
        # Literal argument value
        "validation_throughput": calculate_validation_throughput(db),
        # Literal argument value
        "rejection_rate": calculate_rejection_rate(db),
    }


# ── Trend Data ───────────────────────────────────────────────────────


def get_operational_trend(db: Session, period: str = "90d") -> list:
    """Return weekly trend data for operational metrics.

    Args:
        db (Session): Active SQLAlchemy database session.
        period (str): Lookback period; one of ``"30d"``, ``"90d"``
            (default), or ``"1y"``.

    Returns:
        list: Weekly data points, each a dict with ``date``,
            ``detection_efficacy``, ``validated_tests``, and
            ``detected_tests``.
    """
    # Assign now = datetime.utcnow()
    now = datetime.utcnow()
    # Check: period == "30d"
    if period == "30d":
        # Assign start = now - timedelta(days=30)
        start = now - timedelta(days=30)
    # Alternative: period == "1y"
    elif period == "1y":
        # Assign start = now - timedelta(days=365)
        start = now - timedelta(days=365)
    # Fallback: handle remaining cases
    else:
        # Assign start = now - timedelta(days=90)
        start = now - timedelta(days=90)

    # Build weekly data points
    data_points = []
    # Assign current = start
    current = start
    # Loop while current < now
    while current < now:
        # Assign week_end = min(current + timedelta(days=7), now)
        week_end = min(current + timedelta(days=7), now)

        # Detection efficacy for tests validated up to this week
        validated_up_to = (
            db.query(Test)
            # Chain .filter() call
            .filter(
                Test.state == TestState.validated,
                Test.red_validated_at <= week_end,
            )
            # Chain .all() call
            .all()
        )

        # Assign total = len(validated_up_to)
        total = len(validated_up_to)
        # Assign detected = len([t for t in validated_up_to if t.detection_result == TestResult...
        detected = len([t for t in validated_up_to if t.detection_result == TestResult.detected])
        # Assign efficacy = round((detected / total) * 100, 1) if total > 0 else 0
        efficacy = round((detected / total) * 100, 1) if total > 0 else 0

        # Call data_points.append()
        data_points.append({
            # Literal argument value
            "date": current.strftime("%Y-%m-%d"),
            # Literal argument value
            "detection_efficacy": efficacy,
            # Literal argument value
            "validated_tests": total,
            # Literal argument value
            "detected_tests": detected,
        })

        # Assign current = week_end
        current = week_end

    # Return data_points
    return data_points


# ── By Team ──────────────────────────────────────────────────────────


def get_metrics_by_team(db: Session) -> dict:
    """Return metrics broken down by Red vs Blue team.

    Args:
        db (Session): Active SQLAlchemy database session.

    Returns:
        dict: Contains ``red_team`` and ``blue_team`` sub-dicts, each with
            ``tests_completed``, ``avg_completion_hours``, and
            ``rejection_rate``.
    """
    # Red team metrics
    red_tests_completed = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state.in_([
            TestState.blue_evaluating,
            TestState.in_review,
            TestState.validated,
            TestState.rejected,
        ]))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign red_avg_time = None
    red_avg_time = None
    # Assign red_times = []
    red_times = []
    # Red team avg execution time: red_started_at → blue_started_at (net of paused)
    tests_with_red = (
        db.query(Test)
        .filter(
            Test.red_started_at.isnot(None),
            Test.blue_started_at.isnot(None),
        )
        .all()
    )
    # Iterate over tests_with_red
    for t in tests_with_red:
        gross = (t.blue_started_at - t.red_started_at).total_seconds()
        net = gross - (t.red_paused_seconds or 0)
        if net > 0:
            red_times.append(net / 3600)
    if red_times:
        avg_hours = sum(red_times) / len(red_times)
        # Use minutes for sub-hour values so rounding to 0.0 doesn't hide data
        red_avg_time = round(avg_hours * 60, 1) if avg_hours < 1 else round(avg_hours, 1)

    # Blue team: count tests that reached the blue evaluation phase
    blue_tests_completed = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state.in_([
            TestState.in_review,
            TestState.validated,
            TestState.rejected,
        ]))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Blue avg evaluation time:
    # Prefer blue_work_started_at (actual pick-up) → blue_validated_at.
    # Fall back to blue_started_at if blue_work_started_at is not set.
    blue_avg_time = None
    # Assign blue_times = []
    blue_times = []
    # Assign tests_with_blue = (
    tests_with_blue = (
        db.query(Test)
        # Chain .filter() call
        .filter(
            Test.blue_started_at.isnot(None),
            Test.blue_validated_at.isnot(None),
        )
        # Chain .all() call
        .all()
    )
    # Iterate over tests_with_blue
    for t in tests_with_blue:
        phase_start = t.blue_work_started_at or t.blue_started_at
        gross = (t.blue_validated_at - phase_start).total_seconds()
        net = gross - (t.blue_paused_seconds or 0)
        if net > 0:
            blue_times.append(net / 3600)
    if blue_times:
        avg_hours = sum(blue_times) / len(blue_times)
        blue_avg_time = round(avg_hours * 60, 1) if avg_hours < 1 else round(avg_hours, 1)

    red_avg_raw = sum(red_times) / len(red_times) if red_times else None
    blue_avg_raw = sum(blue_times) / len(blue_times) if blue_times else None

    # Return {
    return {
        # Literal argument value
        "red_team": {
            # Literal argument value
            "tests_completed": red_tests_completed,
            # Literal argument value
            "avg_completion_hours": red_avg_time,
            "avg_unit": "min" if (red_avg_raw is not None and red_avg_raw < 1) else "hrs",
            "rejection_rate": calculate_rejection_rate(db)["by_red_lead"],
        },
        # Literal argument value
        "blue_team": {
            # Literal argument value
            "tests_completed": blue_tests_completed,
            # Literal argument value
            "avg_completion_hours": blue_avg_time,
            "avg_unit": "min" if (blue_avg_raw is not None and blue_avg_raw < 1) else "hrs",
            "rejection_rate": calculate_rejection_rate(db)["by_blue_lead"],
        },
    }