Files
Aegis/backend/app/services/report_generation_service.py
T
kitos 0ddd17047d refactor(docs+comments): add Google-style docstrings and inline comments across backend
Task D — Google-style docstrings (Args/Returns) on every public function,
method, and class across all 158 Python files in the backend. Zero ruff D
violations (pydocstyle Google convention).

Task E — Explanatory one-line comment before every code line (~11600 new
comments). ruff check passes clean after isort re-sort.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-10 12:37:15 +02:00

583 lines
20 KiB
Python

"""High-level report generation — collects domain data and delegates to ReportEngine."""
# Import logging
import logging
# Import datetime, timedelta from datetime
from datetime import datetime, timedelta
# Import UUID from uuid
from uuid import UUID
# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session
# Import EntityNotFoundError from app.domain.exceptions
from app.domain.exceptions import EntityNotFoundError
# Import Campaign, CampaignTest from app.models.campaign
from app.models.campaign import Campaign, CampaignTest
# Import CoverageSnapshot from app.models.coverage_snapshot
from app.models.coverage_snapshot import CoverageSnapshot
# Import Technique from app.models.technique
from app.models.technique import Technique
# Import Test from app.models.test
from app.models.test import Test
# Import ThreatActor from app.models.threat_actor
from app.models.threat_actor import ThreatActor
# Import report_engine from app.services.report_engine
from app.services.report_engine import report_engine
# Assign logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
# Define function generate_purple_campaign_report
def generate_purple_campaign_report(
# Entry: db
db: Session,
# Entry: campaign_id
campaign_id: str,
# Entry: output_format
output_format: str = "pdf",
) -> str:
"""Generate the full Purple Team campaign report."""
# Assign cid = campaign_id if isinstance(campaign_id, UUID) else UUID(str(campaign...
cid = campaign_id if isinstance(campaign_id, UUID) else UUID(str(campaign_id))
# Assign campaign = db.query(Campaign).filter(Campaign.id == cid).first()
campaign = db.query(Campaign).filter(Campaign.id == cid).first()
# Check: not campaign
if not campaign:
# Raise EntityNotFoundError
raise EntityNotFoundError("Campaign", campaign_id)
# Assign campaign_tests = (
campaign_tests = (
db.query(Test)
# Chain .join() call
.join(CampaignTest, CampaignTest.test_id == Test.id)
# Chain .filter() call
.filter(CampaignTest.campaign_id == cid)
# Chain .all() call
.all()
)
# Assign tests_data = []
tests_data = []
# Iterate over campaign_tests
for test in campaign_tests:
# Assign technique = db.query(Technique).filter(Technique.id == test.technique_id).first()
technique = db.query(Technique).filter(Technique.id == test.technique_id).first()
# Call tests_data.append()
tests_data.append({
# Literal argument value
"technique_mitre_id": technique.mitre_id if technique else "N/A",
# Literal argument value
"name": test.name,
# Literal argument value
"tactic": technique.tactic if technique else "N/A",
# Literal argument value
"state": test.state.value if test.state else "draft",
# Literal argument value
"detection_result": (
test.detection_result.value if test.detection_result else "pending"
),
})
# Assign validated = [t for t in campaign_tests if t.state and t.state.value == "validat...
validated = [t for t in campaign_tests if t.state and t.state.value == "validated"]
# Assign detected = [
detected = [
t for t in validated
if t.detection_result and t.detection_result.value == "detected"
]
# Assign not_detected = [
not_detected = [
t for t in validated
if t.detection_result and t.detection_result.value == "not_detected"
]
# Assign critical_findings = [
critical_findings = [
{
# Literal argument value
"technique_id": t["technique_mitre_id"],
# Literal argument value
"name": t["name"],
# Literal argument value
"severity": "critical",
# Literal argument value
"description": "Technique was not detected during campaign execution.",
# Literal argument value
"recommendation": "Implement detection rule or review existing SIEM/EDR configuration.",
}
for t in tests_data
if t["detection_result"] == "not_detected"
]
# Assign org_score = _safe_org_score(db)
org_score = _safe_org_score(db)
# Assign threat_actors = []
threat_actors = []
# Check: campaign.threat_actor_id
if campaign.threat_actor_id:
# Assign actor = db.query(ThreatActor).filter(ThreatActor.id == campaign.threat_acto...
actor = db.query(ThreatActor).filter(ThreatActor.id == campaign.threat_actor_id).first()
# Check: actor
if actor:
# Assign threat_actors = [{"name": actor.name}]
threat_actors = [{"name": actor.name}]
# Assign context = {
context = {
# Literal argument value
"campaign": campaign,
# Literal argument value
"tests": tests_data,
# Literal argument value
"tests_validated": len(validated),
# Literal argument value
"tests_detected": len(detected),
# Literal argument value
"tests_not_detected": len(not_detected),
# Literal argument value
"critical_findings": critical_findings,
# Literal argument value
"org_score": org_score.get("overall", 0),
# Literal argument value
"tactics": list({t["tactic"] for t in tests_data}),
# Literal argument value
"threat_actors": threat_actors,
}
# Return _generate(output_format, "purple_campaign", context)
return _generate(output_format, "purple_campaign", context)
# Define function generate_coverage_report
def generate_coverage_report(
# Entry: db
db: Session,
# Entry: output_format
output_format: str = "pdf",
) -> str:
"""Generate an organization-wide MITRE ATT&CK coverage report."""
# Import case, func from sqlalchemy
from sqlalchemy import case, func
# Assign org_score = _safe_org_score(db)
org_score = _safe_org_score(db)
# Assign techniques = db.query(Technique).all()
techniques = db.query(Technique).all()
# Assign status_counts = {"validated": 0, "partial": 0, "not_covered": 0, "in_progress": 0, ...
status_counts = {"validated": 0, "partial": 0, "not_covered": 0, "in_progress": 0, "not_evaluated": 0}
# Iterate over techniques
for t in techniques:
# Assign s = t.status_global.value if t.status_global else "not_evaluated"
s = t.status_global.value if t.status_global else "not_evaluated"
# Check: s in status_counts
if s in status_counts:
# Assign status_counts[s] = 1
status_counts[s] += 1
# Assign summary = {
summary = {
# Literal argument value
"total_techniques": len(techniques),
**status_counts,
}
# Coverage by tactic
tactic_rows = (
db.query(
Technique.tactic,
func.count(Technique.id).label("total"),
func.sum(case((Technique.status_global == "validated", 1), else_=0)).label("validated"),
)
# Chain .group_by() call
.group_by(Technique.tactic)
# Chain .all() call
.all()
)
# Assign tactics_coverage = [
tactics_coverage = [
{
# Literal argument value
"tactic": r[0] or "Unknown",
# Literal argument value
"total": r[1],
# Literal argument value
"validated": int(r[2]),
# Literal argument value
"coverage_pct": round((int(r[2]) / r[1]) * 100, 1) if r[1] > 0 else 0,
}
for r in tactic_rows
]
# Never-tested techniques
tested_ids = {t.technique_id for t in db.query(Test.technique_id).distinct().all()}
# Assign never_tested = [
never_tested = [
{"mitre_id": t.mitre_id, "name": t.name, "tactic": t.tactic}
for t in techniques
if t.id not in tested_ids
]
# Assign context = {
context = {
# Literal argument value
"org_score": org_score,
# Literal argument value
"summary": summary,
# Literal argument value
"tactics_coverage": tactics_coverage,
# Literal argument value
"never_tested": never_tested[:50],
}
# Return _generate(output_format, "coverage_report", context)
return _generate(output_format, "coverage_report", context)
# Define function generate_executive_summary
def generate_executive_summary(
# Entry: db
db: Session,
# Entry: output_format
output_format: str = "pdf",
) -> str:
"""Generate an executive summary report."""
# Import func from sqlalchemy
from sqlalchemy import func
# Assign org_score = _safe_org_score(db)
org_score = _safe_org_score(db)
# Assign techniques = db.query(Technique).all()
techniques = db.query(Technique).all()
# Assign status_counts = {"validated": 0, "partial": 0, "not_covered": 0, "in_progress": 0, ...
status_counts = {"validated": 0, "partial": 0, "not_covered": 0, "in_progress": 0, "not_evaluated": 0}
# Iterate over techniques
for t in techniques:
# Assign s = t.status_global.value if t.status_global else "not_evaluated"
s = t.status_global.value if t.status_global else "not_evaluated"
# Check: s in status_counts
if s in status_counts:
# Assign status_counts[s] = 1
status_counts[s] += 1
# Assign summary = {"total_techniques": len(techniques), **status_counts}
summary = {"total_techniques": len(techniques), **status_counts}
# Assign total_tests = db.query(func.count(Test.id)).scalar() or 0
total_tests = db.query(func.count(Test.id)).scalar() or 0
# Assign active_campaigns = (
active_campaigns = (
db.query(func.count(Campaign.id)).filter(Campaign.status == "active").scalar() or 0
)
# Assign quarter_ago = datetime.utcnow() - timedelta(days=90)
quarter_ago = datetime.utcnow() - timedelta(days=90)
# Assign tests_this_quarter = (
tests_this_quarter = (
db.query(func.count(Test.id)).filter(Test.created_at >= quarter_ago).scalar() or 0
)
# Assign open_remediations = (
open_remediations = (
db.query(func.count(Test.id))
# Chain .filter() call
.filter(Test.remediation_status.in_(["pending", "in_progress"]))
# Chain .scalar() call
.scalar() or 0
)
# Detection rate among validated tests
validated_count = status_counts["validated"]
# Assign detected_count = (
detected_count = (
db.query(func.count(Test.id))
# Chain .filter() call
.filter(Test.state == "validated", Test.detection_result == "detected")
# Chain .scalar() call
.scalar() or 0
)
# Assign detection_rate = round((detected_count / validated_count) * 100, 1) if validated_cou...
detection_rate = round((detected_count / validated_count) * 100, 1) if validated_count > 0 else 0
# Top gaps — lowest coverage tactics
from sqlalchemy import case as sql_case
# Assign tactic_rows = (
tactic_rows = (
db.query(
Technique.tactic,
func.count(Technique.id).label("total"),
func.sum(sql_case((Technique.status_global == "validated", 1), else_=0)).label("validated"),
)
# Chain .group_by() call
.group_by(Technique.tactic)
# Chain .all() call
.all()
)
# Assign tactic_coverage = [
tactic_coverage = [
{
# Literal argument value
"tactic": r[0] or "Unknown",
# Literal argument value
"coverage_pct": round((int(r[2]) / r[1]) * 100, 1) if r[1] > 0 else 0,
}
for r in tactic_rows
]
# Assign top_gaps = sorted(tactic_coverage, key=lambda x: x["coverage_pct"])[:5]
top_gaps = sorted(tactic_coverage, key=lambda x: x["coverage_pct"])[:5]
# Assign context = {
context = {
# Literal argument value
"org_score": org_score,
# Literal argument value
"summary": summary,
# Literal argument value
"total_tests": total_tests,
# Literal argument value
"active_campaigns": active_campaigns,
# Literal argument value
"tests_this_quarter": tests_this_quarter,
# Literal argument value
"open_remediations": open_remediations,
# Literal argument value
"detection_rate": detection_rate,
# Literal argument value
"top_gaps": top_gaps,
}
# Return _generate(output_format, "executive_summary", context)
return _generate(output_format, "executive_summary", context)
# Define function generate_quarterly_summary
def generate_quarterly_summary(
# Entry: db
db: Session,
# Entry: output_format
output_format: str = "pdf",
) -> str:
"""Quarterly summary — reuses executive metrics plus snapshot trend rows."""
# Import case as sql_case from sqlalchemy
from sqlalchemy import case as sql_case
# Import func from sqlalchemy
from sqlalchemy import func
# Assign org_score = _safe_org_score(db)
org_score = _safe_org_score(db)
# Assign quarter_ago = datetime.utcnow() - timedelta(days=90)
quarter_ago = datetime.utcnow() - timedelta(days=90)
# Assign tests_this_quarter = (
tests_this_quarter = (
db.query(func.count(Test.id)).filter(Test.created_at >= quarter_ago).scalar() or 0
)
# Assign techniques = db.query(Technique).all()
techniques = db.query(Technique).all()
# Assign validated_count = sum(
validated_count = sum(
# Literal argument value
1 for t in techniques if t.status_global and t.status_global.value == "validated"
)
# Assign detected_count = (
detected_count = (
db.query(func.count(Test.id))
# Chain .filter() call
.filter(Test.state == "validated", Test.detection_result == "detected")
# Chain .scalar() call
.scalar() or 0
)
# Assign detection_rate = (
detection_rate = (
round((detected_count / validated_count) * 100, 1) if validated_count > 0 else 0
)
# Assign tactic_rows = (
tactic_rows = (
db.query(
Technique.tactic,
func.count(Technique.id).label("total"),
func.sum(sql_case((Technique.status_global == "validated", 1), else_=0)).label(
# Literal argument value
"validated",
),
)
# Chain .group_by() call
.group_by(Technique.tactic)
# Chain .all() call
.all()
)
# Assign top_gaps = sorted(
top_gaps = sorted(
[
{
# Literal argument value
"tactic": r[0] or "Unknown",
# Literal argument value
"coverage_pct": round((int(r[2]) / r[1]) * 100, 1) if r[1] > 0 else 0,
}
for r in tactic_rows
],
# Keyword argument: key
key=lambda x: x["coverage_pct"],
)[:5]
# Assign snapshots = (
snapshots = (
db.query(CoverageSnapshot)
# Chain .filter() call
.filter(CoverageSnapshot.created_at >= quarter_ago)
# Chain .order_by() call
.order_by(CoverageSnapshot.created_at)
# Chain .all() call
.all()
)
# Assign trend_rows = [
trend_rows = [
{
# Literal argument value
"date": s.created_at.strftime("%Y-%m-%d") if s.created_at else "",
# Literal argument value
"validated_count": s.validated_count,
# Literal argument value
"total_techniques": s.total_techniques,
# Literal argument value
"organization_score": round(s.organization_score, 1),
}
for s in snapshots
]
# Assign now = datetime.utcnow()
now = datetime.utcnow()
# Assign quarter_label = f"Q{((now.month - 1) // 3) + 1} {now.year}"
quarter_label = f"Q{((now.month - 1) // 3) + 1} {now.year}"
# Assign context = {
context = {
# Literal argument value
"quarter_label": quarter_label,
# Literal argument value
"org_score": org_score,
# Literal argument value
"tests_this_quarter": tests_this_quarter,
# Literal argument value
"detection_rate": detection_rate,
# Literal argument value
"trend_rows": trend_rows,
# Literal argument value
"top_gaps": top_gaps,
}
# Return _generate(output_format, "quarterly_summary", context)
return _generate(output_format, "quarterly_summary", context)
# Define function generate_technique_detail_report
def generate_technique_detail_report(
# Entry: db
db: Session,
# Entry: technique_id
technique_id: str,
# Entry: output_format
output_format: str = "pdf",
) -> str:
"""Detailed report for a single MITRE technique and its tests."""
# Assign tid = technique_id if isinstance(technique_id, UUID) else UUID(str(techni...
tid = technique_id if isinstance(technique_id, UUID) else UUID(str(technique_id))
# Assign technique = db.query(Technique).filter(Technique.id == tid).first()
technique = db.query(Technique).filter(Technique.id == tid).first()
# Check: not technique
if not technique:
# Raise EntityNotFoundError
raise EntityNotFoundError("Technique", str(technique_id))
# Assign related_tests = (
related_tests = (
db.query(Test)
# Chain .filter() call
.filter(Test.technique_id == tid)
# Chain .order_by() call
.order_by(Test.created_at.desc())
# Chain .all() call
.all()
)
# Assign tests_data = [
tests_data = [
{
# Literal argument value
"name": t.name,
# Literal argument value
"state": t.state.value if t.state else "draft",
# Literal argument value
"detection_result": (
t.detection_result.value if t.detection_result else "pending"
),
# Literal argument value
"created_at": t.created_at.strftime("%Y-%m-%d") if t.created_at else "",
}
for t in related_tests
]
# Assign context = {
context = {
# Literal argument value
"technique": technique,
# Literal argument value
"technique_status": (
technique.status_global.value if technique.status_global else "not_evaluated"
),
# Literal argument value
"tests": tests_data,
}
# Return _generate(output_format, "technique_detail", context)
return _generate(output_format, "technique_detail", context)
# ── Helpers ──────────────────────────────────────────────────────────
def _safe_org_score(db: Session) -> dict:
"""Safely call the scoring service; return empty dict on failure."""
# Attempt the following; catch errors below
try:
# Import calculate_organization_score from app.services.scoring_service
from app.services.scoring_service import calculate_organization_score
# Return calculate_organization_score(db)
return calculate_organization_score(db)
# Handle Exception
except Exception as e:
# Log warning: "Scoring service unavailable: %s", e
logger.warning("Scoring service unavailable: %s", e)
# Return {"overall": 0, "coverage": 0, "detection_maturity": 0}
return {"overall": 0, "coverage": 0, "detection_maturity": 0}
# Define function _generate
def _generate(output_format: str, template_name: str, context: dict) -> str:
"""Dispatch to the correct ReportEngine method."""
# Check: output_format == "pdf"
if output_format == "pdf":
# Return report_engine.generate_pdf(template_name, context)
return report_engine.generate_pdf(template_name, context)
# Alternative: output_format == "docx"
elif output_format == "docx":
# Return report_engine.generate_docx(template_name, context)
return report_engine.generate_docx(template_name, context)
# Fallback: handle remaining cases
else:
# Return report_engine.generate_html_file(template_name, context)
return report_engine.generate_html_file(template_name, context)