Aegis/backend/app/services/metrics_query_service.py

"""Metrics query service.

Extracts query and aggregation logic from the metrics router so that
the router remains a thin HTTP adapter.  Provides aggregated views
of MITRE ATT&CK technique coverage for dashboards and reporting.

This module is framework-agnostic: no FastAPI imports.
"""

# Enable future language features for compatibility
from __future__ import annotations

# Import defaultdict from collections
from collections import defaultdict

# Import func from sqlalchemy
from sqlalchemy import func

# Import Session, joinedload from sqlalchemy.orm
from sqlalchemy.orm import Session, joinedload

# Import TechniqueStatus, TestState from app.models.enums
from app.models.enums import TechniqueStatus, TestState

# Import Technique from app.models.technique
from app.models.technique import Technique

# Import Test from app.models.test
from app.models.test import Test

# Import  from app.schemas.metrics
from app.schemas.metrics import (
    CoverageSummary,
    RecentTestItem,
    TacticCoverage,
    TeamActivity,
    TestPipelineCounts,
    ValidationRate,
)


# Define function get_coverage_summary
def get_coverage_summary(db: Session) -> CoverageSummary:
    """Return a global coverage summary across all techniques."""
    # Assign rows = (
    rows = (
        db.query(
            Technique.status_global,
            func.count(Technique.id).label("cnt"),
        )
        # Chain .group_by() call
        .group_by(Technique.status_global)
        # Chain .all() call
        .all()
    )

    # Assign counts = {s.value: 0 for s in TechniqueStatus}
    counts: dict[str, int] = {s.value: 0 for s in TechniqueStatus}
    # Iterate over rows
    for status, cnt in rows:
        # Assign counts[status.value] = cnt
        counts[status.value] = cnt

    # Assign total = sum(counts.values())
    total = sum(counts.values())
    # Assign validated = counts["validated"]
    validated = counts["validated"]
    # Assign partial = counts["partial"]
    partial = counts["partial"]

    # Assign coverage_pct = (
    coverage_pct = (
        round((validated + partial) / total * 100, 2) if total > 0 else 0.0
    )

    # Return CoverageSummary(
    return CoverageSummary(
        # Keyword argument: total_techniques
        total_techniques=total,
        # Keyword argument: validated
        validated=validated,
        # Keyword argument: partial
        partial=partial,
        # Keyword argument: not_covered
        not_covered=counts["not_covered"],
        # Keyword argument: in_progress
        in_progress=counts["in_progress"],
        # Keyword argument: not_evaluated
        not_evaluated=counts["not_evaluated"],
        # Keyword argument: coverage_percentage
        coverage_percentage=coverage_pct,
    )


# Define function get_coverage_by_tactic
def get_coverage_by_tactic(db: Session) -> list[TacticCoverage]:
    """Return coverage breakdown grouped by tactic.

    Since a technique can belong to multiple tactics (stored as a
    comma-separated string), the technique is counted once per tactic
    it belongs to.
    """
    # Assign techniques = db.query(
    techniques = db.query(
        Technique.tactic, Technique.status_global
    ).all()

    # Accumulate per-tactic counters.  A technique with tactic
    # "persistence, privilege-escalation" is counted in both.
    tactic_data: dict[str, dict[str, int]] = defaultdict(
        # Entry: lambda
        lambda: {s.value: 0 for s in TechniqueStatus}
    )

    # Iterate over techniques
    for tactic_str, status in techniques:
        # Check: not tactic_str
        if not tactic_str:
            # Assign tactics = ["unknown"]
            tactics = ["unknown"]
        # Fallback: handle remaining cases
        else:
            # Assign tactics = [t.strip() for t in tactic_str.split(",")]
            tactics = [t.strip() for t in tactic_str.split(",")]

        # Iterate over tactics
        for tactic in tactics:
            # Assign tactic_data[tactic][status.value] = 1
            tactic_data[tactic][status.value] += 1

    # Assign result = []
    result = []
    # Iterate over sorted(tactic_data)
    for tactic in sorted(tactic_data):
        # Assign counts = tactic_data[tactic]
        counts = tactic_data[tactic]
        # Assign total = sum(counts.values())
        total = sum(counts.values())
        # Call result.append()
        result.append(
            TacticCoverage(
                # Keyword argument: tactic
                tactic=tactic,
                # Keyword argument: total
                total=total,
                # Keyword argument: validated
                validated=counts["validated"],
                # Keyword argument: partial
                partial=counts["partial"],
                # Keyword argument: not_covered
                not_covered=counts["not_covered"],
                # Keyword argument: not_evaluated
                not_evaluated=counts["not_evaluated"],
                # Keyword argument: in_progress
                in_progress=counts["in_progress"],
            )
        )

    # Return result
    return result


# Define function get_test_pipeline_counts
def get_test_pipeline_counts(db: Session) -> TestPipelineCounts:
    """Return how many tests are in each pipeline state."""
    # Assign rows = (
    rows = (
        db.query(Test.state, func.count(Test.id).label("cnt"))
        # Chain .group_by() call
        .group_by(Test.state)
        # Chain .all() call
        .all()
    )

    # Assign state_counts = {s.value: 0 for s in TestState}
    state_counts: dict[str, int] = {s.value: 0 for s in TestState}
    # Iterate over rows
    for state, cnt in rows:
        # Assign state_counts[state.value] = cnt
        state_counts[state.value] = cnt

    # Assign total = sum(state_counts.values())
    total = sum(state_counts.values())

    # Return TestPipelineCounts(
    return TestPipelineCounts(
        # Keyword argument: draft
        draft=state_counts["draft"],
        # Keyword argument: red_executing
        red_executing=state_counts["red_executing"],
        # Keyword argument: blue_evaluating
        blue_evaluating=state_counts["blue_evaluating"],
        # Keyword argument: in_review
        in_review=state_counts["in_review"],
        # Keyword argument: validated
        validated=state_counts["validated"],
        # Keyword argument: rejected
        rejected=state_counts["rejected"],
        # Keyword argument: total
        total=total,
    )


# Define function get_team_activity
def get_team_activity(db: Session) -> list[TeamActivity]:
    """Return activity summary for Red and Blue teams."""
    # Red Team: completed = tests past red_executing; pending = draft + red_executing
    red_completed = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state.in_([
            TestState.blue_evaluating,
            TestState.in_review,
            TestState.validated,
            TestState.rejected,
        ]))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign red_pending = (
    red_pending = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state.in_([TestState.draft, TestState.red_executing]))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Blue Team: completed = tests past blue_evaluating; pending = blue_evaluating
    blue_completed = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state.in_([
            TestState.in_review,
            TestState.validated,
            TestState.rejected,
        ]))
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Assign blue_pending = (
    blue_pending = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.state == TestState.blue_evaluating)
        # Chain .scalar() call
        .scalar()
    ) or 0

    # Return [
    return [
        TeamActivity(
            # Keyword argument: team
            team="Red Team",
            # Keyword argument: tests_completed
            tests_completed=red_completed,
            # Keyword argument: tests_pending
            tests_pending=red_pending,
        ),
        TeamActivity(
            # Keyword argument: team
            team="Blue Team",
            # Keyword argument: tests_completed
            tests_completed=blue_completed,
            # Keyword argument: tests_pending
            tests_pending=blue_pending,
        ),
    ]


# Define function get_validation_rate
def get_validation_rate(db: Session) -> list[ValidationRate]:
    """Return approval and rejection rates for Red Lead and Blue Lead."""
    # Red Lead validations
    red_approved = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.red_validation_status == "approved")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign red_rejected = (
    red_rejected = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.red_validation_status == "rejected")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign red_total = red_approved + red_rejected
    red_total = red_approved + red_rejected
    # Assign red_rate = round(red_approved / red_total * 100, 1) if red_total > 0 else 0.0
    red_rate = round(red_approved / red_total * 100, 1) if red_total > 0 else 0.0

    # Blue Lead validations
    blue_approved = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.blue_validation_status == "approved")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign blue_rejected = (
    blue_rejected = (
        db.query(func.count(Test.id))
        # Chain .filter() call
        .filter(Test.blue_validation_status == "rejected")
        # Chain .scalar() call
        .scalar()
    ) or 0
    # Assign blue_total = blue_approved + blue_rejected
    blue_total = blue_approved + blue_rejected
    # Assign blue_rate = round(blue_approved / blue_total * 100, 1) if blue_total > 0 else 0.0
    blue_rate = round(blue_approved / blue_total * 100, 1) if blue_total > 0 else 0.0

    # Return [
    return [
        ValidationRate(
            # Keyword argument: role
            role="red_lead",
            # Keyword argument: total_reviewed
            total_reviewed=red_total,
            # Keyword argument: approved
            approved=red_approved,
            # Keyword argument: rejected
            rejected=red_rejected,
            # Keyword argument: approval_rate
            approval_rate=red_rate,
        ),
        ValidationRate(
            # Keyword argument: role
            role="blue_lead",
            # Keyword argument: total_reviewed
            total_reviewed=blue_total,
            # Keyword argument: approved
            approved=blue_approved,
            # Keyword argument: rejected
            rejected=blue_rejected,
            # Keyword argument: approval_rate
            approval_rate=blue_rate,
        ),
    ]


# Define function get_recent_tests
def get_recent_tests(db: Session, *, limit: int = 10) -> list[RecentTestItem]:
    """Return the most recently created tests."""
    # Assign tests = (
    tests = (
        db.query(Test)
        # Chain .options() call
        .options(joinedload(Test.technique))
        # Chain .order_by() call
        .order_by(Test.created_at.desc())
        # Chain .limit() call
        .limit(limit)
        # Chain .all() call
        .all()
    )

    # Return [
    return [
        RecentTestItem(
            # Keyword argument: id
            id=str(t.id),
            # Keyword argument: name
            name=t.name,
            # Keyword argument: state
            state=t.state.value,
            # Keyword argument: technique_mitre_id
            technique_mitre_id=t.technique.mitre_id if t.technique else None,
            # Keyword argument: technique_name
            technique_name=t.technique.name if t.technique else None,
            # Keyword argument: created_at
            created_at=t.created_at,
        )
        for t in tests
    ]