feat(alerts): Phase 13 — Operational Alert Engine
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled

AlertRule + AlertInstance models (b041alerts migration), 8 pre-seeded system
rules (high_risk x2, stale_technique, coverage_regression, low_coverage,
expiry_wave, new_technique, orphan_spike), evaluation engine with per-rule
cooldown, full alert lifecycle (acknowledge/resolve/dismiss), custom rule CRUD,
and summary endpoint. Rules seeded at app startup.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
kitos
2026-05-21 15:25:55 +02:00
parent d81fc04b8f
commit d4b147da7c
8 changed files with 1387 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
"""Phase 13: Operational Alerts — alert_rules and alert_instances tables.
Revision ID: b041alerts
Revises: b040ent
Create Date: 2026-05-21
"""
from alembic import op
import sqlalchemy as sa
revision = "b041alerts"
down_revision = "b040ent"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
# ── alert_rules ───────────────────────────────────────────────────────────
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS alert_rules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(300) NOT NULL,
description TEXT,
rule_type VARCHAR(50) NOT NULL,
severity VARCHAR(20) NOT NULL DEFAULT 'medium',
is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
is_system BOOLEAN NOT NULL DEFAULT FALSE,
config JSONB NOT NULL DEFAULT '{}',
notify_in_app BOOLEAN NOT NULL DEFAULT TRUE,
notify_webhook BOOLEAN NOT NULL DEFAULT FALSE,
webhook_id UUID REFERENCES webhook_configs(id) ON DELETE SET NULL,
cooldown_hours INTEGER NOT NULL DEFAULT 24,
created_by UUID REFERENCES users(id) ON DELETE SET NULL,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT now(),
last_fired_at TIMESTAMP WITHOUT TIME ZONE
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_rules_type ON alert_rules (rule_type)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_rules_enabled ON alert_rules (is_enabled)"
))
# ── alert_instances ───────────────────────────────────────────────────────
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS alert_instances (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
rule_id UUID REFERENCES alert_rules(id) ON DELETE SET NULL,
rule_name VARCHAR(300) NOT NULL,
rule_type VARCHAR(50) NOT NULL,
severity VARCHAR(20) NOT NULL,
title VARCHAR(500) NOT NULL,
message TEXT NOT NULL,
details JSONB,
status VARCHAR(20) NOT NULL DEFAULT 'open',
acknowledged_by UUID REFERENCES users(id) ON DELETE SET NULL,
acknowledged_at TIMESTAMP WITHOUT TIME ZONE,
resolved_at TIMESTAMP WITHOUT TIME ZONE,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT now()
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_instances_rule_id ON alert_instances (rule_id)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_instances_status ON alert_instances (status)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_instances_severity ON alert_instances (severity)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS ix_alert_instances_created ON alert_instances (created_at)"
))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("DROP TABLE IF EXISTS alert_instances CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS alert_rules CASCADE"))

View File

@@ -46,6 +46,7 @@ from app.routers import risk_intelligence as risk_router
from app.routers import executive_dashboard as dashboard_router
from app.routers import api_keys as api_keys_router
from app.routers import sso as sso_router
from app.routers import operational_alerts as alerts_router
from app.domain.errors import DomainError
from app.middleware.error_handler import domain_exception_handler
from app.middleware.request_context import RequestContextMiddleware
@@ -76,6 +77,15 @@ async def lifespan(app: FastAPI):
pass
finally:
db.close()
# Seed operational alert system rules
db2 = SessionLocal()
try:
from app.services.operational_alert_service import seed_system_rules
seed_system_rules(db2)
except Exception:
pass
finally:
db2.close()
yield
# Graceful shutdown of the background scheduler
scheduler.shutdown(wait=False)
@@ -151,6 +161,7 @@ app.include_router(risk_router.router, prefix="/api/v1")
app.include_router(dashboard_router.router, prefix="/api/v1")
app.include_router(api_keys_router.router, prefix="/api/v1")
app.include_router(sso_router.router, prefix="/api/v1")
app.include_router(alerts_router.router, prefix="/api/v1")
@app.get("/health", include_in_schema=False)

View File

@@ -43,6 +43,7 @@ from app.models.risk_intelligence import TechniqueRiskProfile
from app.models.executive_dashboard import PostureSnapshot
from app.models.api_key import ApiKey
from app.models.sso_config import SsoConfig
from app.models.operational_alert import AlertRule, AlertInstance
__all__ = [
"User", "Technique", "Test", "TestTemplate", "Evidence",
@@ -69,4 +70,6 @@ __all__ = [
"PostureSnapshot",
"ApiKey",
"SsoConfig",
"AlertRule",
"AlertInstance",
]

View File

@@ -0,0 +1,144 @@
"""Phase 13: Operational Alerts — AlertRule and AlertInstance models."""
import enum
import uuid
from datetime import datetime
from sqlalchemy import (
Boolean, Column, DateTime, ForeignKey,
Index, Integer, String, Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import relationship
from app.database import Base
# ── Enumerations ──────────────────────────────────────────────────────────────
class AlertSeverity(str, enum.Enum):
critical = "critical"
high = "high"
medium = "medium"
low = "low"
info = "info"
class AlertStatus(str, enum.Enum):
open = "open"
acknowledged = "acknowledged"
resolved = "resolved"
dismissed = "dismissed"
class AlertRuleType(str, enum.Enum):
high_risk = "high_risk" # risk_score >= threshold
stale_technique = "stale_technique" # not validated in N days
coverage_regression = "coverage_regression" # coverage_pct dropped
low_coverage = "low_coverage" # coverage below min
expiry_wave = "expiry_wave" # many pending queue items
new_technique = "new_technique" # new MITRE techniques added
orphan_spike = "orphan_spike" # many unowned techniques
custom = "custom" # future extension placeholder
# ── AlertRule ─────────────────────────────────────────────────────────────────
class AlertRule(Base):
"""
Defines a condition that, when satisfied, fires an AlertInstance.
System rules (is_system=True) are seeded at startup and cannot be deleted.
Custom rules (is_system=False) can be created by admins.
"""
__tablename__ = "alert_rules"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
name = Column(String(300), nullable=False)
description = Column(Text, nullable=True)
rule_type = Column(String(50), nullable=False)
severity = Column(String(20), nullable=False, default=AlertSeverity.medium.value)
is_enabled = Column(Boolean, nullable=False, default=True)
is_system = Column(Boolean, nullable=False, default=False) # seeded, not deletable
# Rule-specific thresholds/config (varies by rule_type)
config = Column(JSONB, nullable=False, default={})
# Delivery
notify_in_app = Column(Boolean, nullable=False, default=True)
notify_webhook = Column(Boolean, nullable=False, default=False)
webhook_id = Column(
UUID(as_uuid=True),
ForeignKey("webhook_configs.id", ondelete="SET NULL"),
nullable=True,
)
# Cooldown — don't re-fire within N hours of last firing
cooldown_hours = Column(Integer, nullable=False, default=24)
# Meta
created_by = Column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
created_at = Column(DateTime, default=datetime.utcnow)
last_fired_at = Column(DateTime, nullable=True)
creator = relationship("User", foreign_keys=[created_by])
instances = relationship("AlertInstance", back_populates="rule",
cascade="all, delete-orphan")
__table_args__ = (
Index("ix_alert_rules_type", "rule_type"),
Index("ix_alert_rules_enabled", "is_enabled"),
)
# ── AlertInstance ─────────────────────────────────────────────────────────────
class AlertInstance(Base):
"""
A single firing of an AlertRule.
Transitions: open → acknowledged → resolved
open → dismissed
"""
__tablename__ = "alert_instances"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
rule_id = Column(
UUID(as_uuid=True),
ForeignKey("alert_rules.id", ondelete="SET NULL"),
nullable=True,
)
# Denormalised fields kept for history even after rule deletion
rule_name = Column(String(300), nullable=False)
rule_type = Column(String(50), nullable=False)
severity = Column(String(20), nullable=False)
title = Column(String(500), nullable=False)
message = Column(Text, nullable=False)
details = Column(JSONB, nullable=True) # structured context
status = Column(String(20), nullable=False, default=AlertStatus.open.value)
acknowledged_by = Column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
acknowledged_at = Column(DateTime, nullable=True)
resolved_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
rule = relationship("AlertRule", back_populates="instances")
acknowledger = relationship("User", foreign_keys=[acknowledged_by])
__table_args__ = (
Index("ix_alert_instances_rule_id", "rule_id"),
Index("ix_alert_instances_status", "status"),
Index("ix_alert_instances_severity", "severity"),
Index("ix_alert_instances_created", "created_at"),
)

View File

@@ -0,0 +1,191 @@
"""Phase 13: Operational Alerts router."""
from typing import List, Optional
from uuid import UUID
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies.auth import get_current_user, require_any_role
from app.models.user import User
from app.schemas.operational_alert_schema import (
AlertRuleCreate, AlertRuleOut, AlertRuleUpdate,
AlertInstanceOut, EvaluationResult, AlertSummary,
)
import app.services.operational_alert_service as svc
router = APIRouter(prefix="/alerts", tags=["Operational Alerts"])
# ── Evaluation ────────────────────────────────────────────────────────────────
@router.post("/evaluate", response_model=EvaluationResult, status_code=202)
def evaluate_rules(
db: Session = Depends(get_db),
user=Depends(require_any_role("admin", "red_lead", "blue_lead")),
):
"""
Run the alert evaluation engine against all enabled rules.
Fires AlertInstances for rules whose conditions are met and are not in cooldown.
Admin / leads only.
"""
result = svc.evaluate_all_rules(db)
return EvaluationResult(
rules_evaluated = result["rules_evaluated"],
alerts_fired = result["alerts_fired"],
alerts = [AlertInstanceOut.model_validate(a) for a in result["alerts"]],
duration_seconds = result["duration_seconds"],
)
# ── Alert instances ───────────────────────────────────────────────────────────
@router.get("", response_model=List[AlertInstanceOut])
def list_alerts(
status: Optional[str] = Query(None),
severity: Optional[str] = Query(None),
rule_type: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0),
db: Session = Depends(get_db),
user=Depends(get_current_user),
):
"""List alert instances with optional filters."""
return svc.list_instances(db, status=status, severity=severity,
rule_type=rule_type, limit=limit, offset=offset)
@router.get("/summary", response_model=AlertSummary)
def alert_summary(
db: Session = Depends(get_db),
user=Depends(get_current_user),
):
"""Aggregate counts by status, severity, and rule type."""
data = svc.get_summary(db)
return AlertSummary(
total_open = data["total_open"],
total_acknowledged = data["total_acknowledged"],
total_resolved = data["total_resolved"],
by_severity = data["by_severity"],
by_rule_type = data["by_rule_type"],
recent_alerts = [AlertInstanceOut.model_validate(a) for a in data["recent_alerts"]],
)
@router.get("/{alert_id}", response_model=AlertInstanceOut)
def get_alert(
alert_id: UUID,
db: Session = Depends(get_db),
user=Depends(get_current_user),
):
"""Get a single alert instance."""
return svc.get_instance(db, alert_id)
@router.post("/{alert_id}/acknowledge", response_model=AlertInstanceOut)
def acknowledge_alert(
alert_id: UUID,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Acknowledge an open alert."""
return svc.acknowledge(db, alert_id, current_user.id)
@router.post("/{alert_id}/resolve", response_model=AlertInstanceOut)
def resolve_alert(
alert_id: UUID,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Mark an alert as resolved."""
return svc.resolve(db, alert_id, current_user.id)
@router.post("/{alert_id}/dismiss", response_model=AlertInstanceOut)
def dismiss_alert(
alert_id: UUID,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Dismiss an alert (won't re-fire until cooldown resets)."""
return svc.dismiss(db, alert_id, current_user.id)
# ── Alert rules ───────────────────────────────────────────────────────────────
@router.get("/rules/list", response_model=List[AlertRuleOut])
def list_rules(
rule_type: Optional[str] = Query(None),
include_disabled: bool = Query(False),
db: Session = Depends(get_db),
user=Depends(get_current_user),
):
"""List alert rules (all users can read; admins/leads manage them)."""
return svc.list_rules(db, rule_type=rule_type, include_disabled=include_disabled)
@router.post("/rules", response_model=AlertRuleOut, status_code=201)
def create_rule(
body: AlertRuleCreate,
db: Session = Depends(get_db),
current_user: User = Depends(require_any_role("admin", "red_lead", "blue_lead")),
):
"""Create a custom alert rule."""
return svc.create_rule(
db,
created_by = current_user.id,
name = body.name,
description = body.description,
rule_type = body.rule_type,
severity = body.severity,
config = body.config,
notify_in_app = body.notify_in_app,
notify_webhook = body.notify_webhook,
webhook_id = body.webhook_id,
cooldown_hours = body.cooldown_hours,
)
@router.get("/rules/{rule_id}", response_model=AlertRuleOut)
def get_rule(
rule_id: UUID,
db: Session = Depends(get_db),
user=Depends(get_current_user),
):
"""Get a single alert rule."""
return svc.get_rule(db, rule_id)
@router.patch("/rules/{rule_id}", response_model=AlertRuleOut)
def update_rule(
rule_id: UUID,
body: AlertRuleUpdate,
db: Session = Depends(get_db),
user=Depends(require_any_role("admin", "red_lead", "blue_lead")),
):
"""Update an alert rule (enable/disable, thresholds, cooldown)."""
return svc.update_rule(
db, rule_id,
name = body.name,
description = body.description,
severity = body.severity,
is_enabled = body.is_enabled,
config = body.config,
notify_in_app = body.notify_in_app,
notify_webhook = body.notify_webhook,
webhook_id = body.webhook_id,
cooldown_hours = body.cooldown_hours,
)
@router.delete("/rules/{rule_id}", status_code=204)
def delete_rule(
rule_id: UUID,
db: Session = Depends(get_db),
user=Depends(require_any_role("admin")),
):
"""Delete a custom alert rule (system rules cannot be deleted)."""
svc.delete_rule(db, rule_id)

View File

@@ -0,0 +1,124 @@
"""Phase 13: Operational Alerts — Pydantic schemas."""
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional
from uuid import UUID
from pydantic import BaseModel, Field, field_validator
from app.models.operational_alert import AlertRuleType, AlertSeverity, AlertStatus
VALID_SEVERITIES = {s.value for s in AlertSeverity}
VALID_STATUSES = {s.value for s in AlertStatus}
VALID_RULE_TYPES = {r.value for r in AlertRuleType}
# ── AlertRule schemas ─────────────────────────────────────────────────────────
class AlertRuleCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=300)
description: Optional[str] = None
rule_type: str
severity: str = "medium"
config: Dict[str, Any] = Field(default_factory=dict)
notify_in_app: bool = True
notify_webhook: bool = False
webhook_id: Optional[UUID] = None
cooldown_hours: int = Field(24, ge=0, le=8760)
@field_validator("rule_type")
@classmethod
def validate_rule_type(cls, v: str) -> str:
if v not in VALID_RULE_TYPES:
raise ValueError(f"Invalid rule_type. Valid: {VALID_RULE_TYPES}")
return v
@field_validator("severity")
@classmethod
def validate_severity(cls, v: str) -> str:
if v not in VALID_SEVERITIES:
raise ValueError(f"Invalid severity. Valid: {VALID_SEVERITIES}")
return v
class AlertRuleUpdate(BaseModel):
name: Optional[str] = Field(None, min_length=1, max_length=300)
description: Optional[str] = None
severity: Optional[str] = None
is_enabled: Optional[bool] = None
config: Optional[Dict[str, Any]] = None
notify_in_app: Optional[bool] = None
notify_webhook: Optional[bool] = None
webhook_id: Optional[UUID] = None
cooldown_hours: Optional[int] = Field(None, ge=0, le=8760)
@field_validator("severity")
@classmethod
def validate_severity(cls, v: Optional[str]) -> Optional[str]:
if v is not None and v not in VALID_SEVERITIES:
raise ValueError(f"Invalid severity. Valid: {VALID_SEVERITIES}")
return v
class AlertRuleOut(BaseModel):
id: UUID
name: str
description: Optional[str] = None
rule_type: str
severity: str
is_enabled: bool
is_system: bool
config: Dict[str, Any]
notify_in_app: bool
notify_webhook: bool
webhook_id: Optional[UUID] = None
cooldown_hours: int
created_by: Optional[UUID] = None
created_at: Optional[datetime] = None
last_fired_at: Optional[datetime] = None
class Config:
from_attributes = True
# ── AlertInstance schemas ─────────────────────────────────────────────────────
class AlertInstanceOut(BaseModel):
id: UUID
rule_id: Optional[UUID] = None
rule_name: str
rule_type: str
severity: str
title: str
message: str
details: Optional[Dict[str, Any]] = None
status: str
acknowledged_by: Optional[UUID] = None
acknowledged_at: Optional[datetime] = None
resolved_at: Optional[datetime] = None
created_at: Optional[datetime] = None
class Config:
from_attributes = True
# ── Evaluation result ─────────────────────────────────────────────────────────
class EvaluationResult(BaseModel):
rules_evaluated: int
alerts_fired: int
alerts: List[AlertInstanceOut] = Field(default_factory=list)
duration_seconds: float
# ── Summary ───────────────────────────────────────────────────────────────────
class AlertSummary(BaseModel):
total_open: int
total_acknowledged: int
total_resolved: int
by_severity: Dict[str, int]
by_rule_type: Dict[str, int]
recent_alerts: List[AlertInstanceOut] = Field(default_factory=list)

View File

@@ -0,0 +1,530 @@
"""Phase 13: Operational Alert service — rule evaluation engine + CRUD."""
from __future__ import annotations
import logging
import time
from datetime import datetime, timedelta
from typing import List, Optional
from uuid import UUID
from sqlalchemy.orm import Session
from app.domain.errors import EntityNotFoundError, BusinessRuleViolation
from app.models.operational_alert import (
AlertInstance, AlertRule, AlertRuleType, AlertSeverity, AlertStatus,
)
from app.models.technique import Technique
from app.models.risk_intelligence import TechniqueRiskProfile
from app.models.ownership_queue import RevalidationQueueItem, QueueStatus
from app.models.ownership_queue import TechniqueOwnership
from app.models.executive_dashboard import PostureSnapshot
from app.models.enums import TechniqueStatus
log = logging.getLogger(__name__)
# ── Pre-configured system rules (seeded at startup) ───────────────────────────
SYSTEM_RULES = [
{
"name": "Critical Risk Techniques",
"description": "Fires when 3 or more techniques reach critical risk level (score ≥ 75).",
"rule_type": AlertRuleType.high_risk.value,
"severity": AlertSeverity.critical.value,
"is_system": True,
"config": {"min_risk_score": 75.0, "min_count": 3},
"cooldown_hours": 24,
},
{
"name": "High-Risk Technique Spike",
"description": "Fires when 10 or more techniques reach high risk (score ≥ 50).",
"rule_type": AlertRuleType.high_risk.value,
"severity": AlertSeverity.high.value,
"is_system": True,
"config": {"min_risk_score": 50.0, "min_count": 10},
"cooldown_hours": 24,
},
{
"name": "Stale Technique Detection",
"description": "Fires when 5+ validated techniques have not been reviewed in 30+ days.",
"rule_type": AlertRuleType.stale_technique.value,
"severity": AlertSeverity.medium.value,
"is_system": True,
"config": {"days_stale": 30, "min_count": 5},
"cooldown_hours": 48,
},
{
"name": "Coverage Regression",
"description": "Fires when coverage drops by 5 or more percentage points between daily snapshots.",
"rule_type": AlertRuleType.coverage_regression.value,
"severity": AlertSeverity.high.value,
"is_system": True,
"config": {"min_drop_pct": 5.0},
"cooldown_hours": 12,
},
{
"name": "Low Coverage Warning",
"description": "Fires when overall coverage falls below 30%.",
"rule_type": AlertRuleType.low_coverage.value,
"severity": AlertSeverity.medium.value,
"is_system": True,
"config": {"max_coverage_pct": 30.0},
"cooldown_hours": 72,
},
{
"name": "Revalidation Queue Backlog",
"description": "Fires when 15+ techniques are waiting in the revalidation queue.",
"rule_type": AlertRuleType.expiry_wave.value,
"severity": AlertSeverity.medium.value,
"is_system": True,
"config": {"min_pending_count": 15},
"cooldown_hours": 24,
},
{
"name": "New MITRE Techniques Detected",
"description": "Fires when new ATT&CK techniques are added in the last 7 days.",
"rule_type": AlertRuleType.new_technique.value,
"severity": AlertSeverity.info.value,
"is_system": True,
"config": {"lookback_days": 7, "min_count": 1},
"cooldown_hours": 168, # once a week
},
{
"name": "Orphan Technique Spike",
"description": "Fires when 20+ techniques have no assigned owner.",
"rule_type": AlertRuleType.orphan_spike.value,
"severity": AlertSeverity.low.value,
"is_system": True,
"config": {"min_orphan_count": 20},
"cooldown_hours": 48,
},
]
def seed_system_rules(db: Session) -> int:
"""Ensure all system rules exist (idempotent). Returns count created."""
created = 0
for rule_def in SYSTEM_RULES:
exists = db.query(AlertRule).filter(
AlertRule.name == rule_def["name"],
AlertRule.is_system == True,
).first()
if not exists:
rule = AlertRule(**rule_def)
db.add(rule)
created += 1
if created:
db.commit()
return created
# ── Rule evaluators (one per AlertRuleType) ───────────────────────────────────
def _eval_high_risk(db: Session, rule: AlertRule) -> Optional[dict]:
min_score = float(rule.config.get("min_risk_score", 75.0))
min_count = int(rule.config.get("min_count", 1))
profiles = db.query(TechniqueRiskProfile).filter(
TechniqueRiskProfile.risk_score >= min_score,
).all()
count = len(profiles)
if count < min_count:
return None
top = sorted(profiles, key=lambda p: p.risk_score, reverse=True)[:5]
return {
"title": f"{count} technique(s) with risk score ≥ {min_score:.0f}",
"message": (
f"{count} technique(s) have reached risk score ≥ {min_score:.0f}. "
f"Top: {', '.join(str(p.technique_id)[:8] + '' for p in top[:3])}."
),
"details": {
"count": count,
"threshold": min_score,
"top_ids": [str(p.technique_id) for p in top],
"top_scores": [p.risk_score for p in top],
},
}
def _eval_stale_technique(db: Session, rule: AlertRule) -> Optional[dict]:
days_stale = int(rule.config.get("days_stale", 30))
min_count = int(rule.config.get("min_count", 1))
cutoff = datetime.utcnow() - timedelta(days=days_stale)
stale = db.query(Technique).filter(
Technique.status_global == TechniqueStatus.validated,
Technique.last_review_date < cutoff,
).all()
count = len(stale)
if count < min_count:
return None
return {
"title": f"{count} validated technique(s) stale for {days_stale}+ days",
"message": (
f"{count} technique(s) have been validated but not reviewed in over "
f"{days_stale} days. Re-validate to maintain confidence."
),
"details": {
"count": count,
"days_stale": days_stale,
"example_ids": [str(t.id) for t in stale[:10]],
},
}
def _eval_coverage_regression(db: Session, rule: AlertRule) -> Optional[dict]:
min_drop = float(rule.config.get("min_drop_pct", 5.0))
snaps = (
db.query(PostureSnapshot)
.order_by(PostureSnapshot.snapshot_date.desc())
.limit(2)
.all()
)
if len(snaps) < 2:
return None
latest, previous = snaps[0], snaps[1]
drop = previous.coverage_pct - latest.coverage_pct
if drop < min_drop:
return None
return {
"title": f"Coverage dropped {drop:.1f}% ({previous.coverage_pct:.1f}% → {latest.coverage_pct:.1f}%)",
"message": (
f"Overall coverage fell by {drop:.1f} percentage points "
f"between {previous.snapshot_date} and {latest.snapshot_date}. "
f"Investigate recent technique status changes."
),
"details": {
"previous_pct": previous.coverage_pct,
"current_pct": latest.coverage_pct,
"drop_pct": round(drop, 2),
"previous_date": str(previous.snapshot_date),
"current_date": str(latest.snapshot_date),
},
}
def _eval_low_coverage(db: Session, rule: AlertRule) -> Optional[dict]:
max_pct = float(rule.config.get("max_coverage_pct", 30.0))
techniques = db.query(Technique).all()
total = len(techniques)
if total == 0:
return None
validated = sum(1 for t in techniques if t.status_global == TechniqueStatus.validated)
partial = sum(1 for t in techniques if t.status_global == TechniqueStatus.partial)
coverage = (validated + partial * 0.5) / total * 100.0
if coverage > max_pct:
return None
return {
"title": f"Coverage is critically low: {coverage:.1f}%",
"message": (
f"Current detection coverage is {coverage:.1f}%, below the minimum "
f"threshold of {max_pct:.0f}%. Prioritise coverage improvements."
),
"details": {
"coverage_pct": round(coverage, 2),
"threshold": max_pct,
"validated": validated,
"partial": partial,
"total": total,
},
}
def _eval_expiry_wave(db: Session, rule: AlertRule) -> Optional[dict]:
min_pending = int(rule.config.get("min_pending_count", 15))
pending_count = db.query(RevalidationQueueItem).filter(
RevalidationQueueItem.status.in_([
QueueStatus.pending, QueueStatus.in_progress,
]),
).count()
if pending_count < min_pending:
return None
return {
"title": f"Revalidation queue backlog: {pending_count} items pending",
"message": (
f"{pending_count} technique(s) are waiting in the revalidation queue "
f"(threshold: {min_pending}). Assign analysts to clear the backlog."
),
"details": {
"pending_count": pending_count,
"threshold": min_pending,
},
}
def _eval_new_technique(db: Session, rule: AlertRule) -> Optional[dict]:
lookback_days = int(rule.config.get("lookback_days", 7))
min_count = int(rule.config.get("min_count", 1))
cutoff = datetime.utcnow() - timedelta(days=lookback_days)
new_techs = db.query(Technique).filter(
Technique.mitre_last_modified >= cutoff,
).all()
count = len(new_techs)
if count < min_count:
return None
return {
"title": f"{count} new/updated MITRE technique(s) in last {lookback_days} days",
"message": (
f"{count} ATT&CK technique(s) have been added or updated in the last "
f"{lookback_days} days. Review and assign coverage."
),
"details": {
"count": count,
"lookback_days": lookback_days,
"technique_ids": [str(t.id) for t in new_techs[:20]],
"mitre_ids": [t.mitre_id for t in new_techs[:20]],
},
}
def _eval_orphan_spike(db: Session, rule: AlertRule) -> Optional[dict]:
min_orphans = int(rule.config.get("min_orphan_count", 20))
total = db.query(Technique).count()
owned = db.query(TechniqueOwnership).filter(
TechniqueOwnership.owner_id.isnot(None),
).count()
orphans = max(total - owned, 0)
if orphans < min_orphans:
return None
return {
"title": f"{orphans} unowned techniques detected",
"message": (
f"{orphans} out of {total} technique(s) have no assigned owner. "
f"Assign ownership to ensure accountability."
),
"details": {
"orphan_count": orphans,
"total": total,
"threshold": min_orphans,
},
}
_EVALUATORS = {
AlertRuleType.high_risk.value: _eval_high_risk,
AlertRuleType.stale_technique.value: _eval_stale_technique,
AlertRuleType.coverage_regression.value: _eval_coverage_regression,
AlertRuleType.low_coverage.value: _eval_low_coverage,
AlertRuleType.expiry_wave.value: _eval_expiry_wave,
AlertRuleType.new_technique.value: _eval_new_technique,
AlertRuleType.orphan_spike.value: _eval_orphan_spike,
}
# ── Core evaluation engine ────────────────────────────────────────────────────
def _in_cooldown(rule: AlertRule) -> bool:
if rule.last_fired_at is None:
return False
if rule.cooldown_hours <= 0:
return False
return datetime.utcnow() < rule.last_fired_at + timedelta(hours=rule.cooldown_hours)
def evaluate_all_rules(db: Session) -> dict:
"""Evaluate every enabled rule; create AlertInstances for those that fire."""
t0 = time.monotonic()
rules = db.query(AlertRule).filter(AlertRule.is_enabled == True).all()
fired: List[AlertInstance] = []
for rule in rules:
if _in_cooldown(rule):
continue
evaluator = _EVALUATORS.get(rule.rule_type)
if not evaluator:
continue
try:
result = evaluator(db, rule)
except Exception:
log.exception("Error evaluating rule %s (%s)", rule.id, rule.name)
continue
if result is None:
continue # condition not met
instance = AlertInstance(
rule_id = rule.id,
rule_name = rule.name,
rule_type = rule.rule_type,
severity = rule.severity,
title = result["title"],
message = result["message"],
details = result.get("details"),
status = AlertStatus.open.value,
)
db.add(instance)
rule.last_fired_at = datetime.utcnow()
fired.append(instance)
db.commit()
for inst in fired:
db.refresh(inst)
return {
"rules_evaluated": len(rules),
"alerts_fired": len(fired),
"alerts": fired,
"duration_seconds": round(time.monotonic() - t0, 3),
}
# ── AlertRule CRUD ────────────────────────────────────────────────────────────
def list_rules(
db: Session,
rule_type: Optional[str] = None,
include_disabled: bool = False,
) -> List[AlertRule]:
q = db.query(AlertRule)
if rule_type:
q = q.filter(AlertRule.rule_type == rule_type)
if not include_disabled:
q = q.filter(AlertRule.is_enabled == True)
return q.order_by(AlertRule.created_at.asc()).all()
def get_rule(db: Session, rule_id: UUID) -> AlertRule:
rule = db.query(AlertRule).filter(AlertRule.id == rule_id).first()
if not rule:
raise EntityNotFoundError("AlertRule", str(rule_id))
return rule
def create_rule(db: Session, created_by: UUID, **kwargs) -> AlertRule:
kwargs["is_system"] = False
kwargs["created_by"] = created_by
rule = AlertRule(**kwargs)
db.add(rule)
db.commit()
db.refresh(rule)
return rule
def update_rule(db: Session, rule_id: UUID, **kwargs) -> AlertRule:
rule = get_rule(db, rule_id)
for k, v in kwargs.items():
if v is not None:
setattr(rule, k, v)
db.commit()
db.refresh(rule)
return rule
def delete_rule(db: Session, rule_id: UUID) -> None:
rule = get_rule(db, rule_id)
if rule.is_system:
raise BusinessRuleViolation("System rules cannot be deleted. Disable them instead.")
db.delete(rule)
db.commit()
# ── AlertInstance CRUD ────────────────────────────────────────────────────────
def list_instances(
db: Session,
status: Optional[str] = None,
severity: Optional[str] = None,
rule_type: Optional[str] = None,
limit: int = 50,
offset: int = 0,
) -> List[AlertInstance]:
q = db.query(AlertInstance)
if status:
q = q.filter(AlertInstance.status == status)
if severity:
q = q.filter(AlertInstance.severity == severity)
if rule_type:
q = q.filter(AlertInstance.rule_type == rule_type)
return q.order_by(AlertInstance.created_at.desc()).offset(offset).limit(limit).all()
def get_instance(db: Session, instance_id: UUID) -> AlertInstance:
inst = db.query(AlertInstance).filter(AlertInstance.id == instance_id).first()
if not inst:
raise EntityNotFoundError("AlertInstance", str(instance_id))
return inst
def _transition(
db: Session,
instance_id: UUID,
new_status: str,
user_id: Optional[UUID] = None,
) -> AlertInstance:
inst = get_instance(db, instance_id)
inst.status = new_status
if new_status == AlertStatus.acknowledged.value:
inst.acknowledged_by = user_id
inst.acknowledged_at = datetime.utcnow()
elif new_status == AlertStatus.resolved.value:
inst.resolved_at = datetime.utcnow()
db.commit()
db.refresh(inst)
return inst
def acknowledge(db: Session, instance_id: UUID, user_id: UUID) -> AlertInstance:
inst = get_instance(db, instance_id)
if inst.status != AlertStatus.open.value:
raise BusinessRuleViolation(f"Cannot acknowledge alert in status '{inst.status}'.")
return _transition(db, instance_id, AlertStatus.acknowledged.value, user_id)
def resolve(db: Session, instance_id: UUID, user_id: UUID) -> AlertInstance:
inst = get_instance(db, instance_id)
if inst.status == AlertStatus.resolved.value:
raise BusinessRuleViolation("Alert is already resolved.")
return _transition(db, instance_id, AlertStatus.resolved.value, user_id)
def dismiss(db: Session, instance_id: UUID, user_id: UUID) -> AlertInstance:
inst = get_instance(db, instance_id)
if inst.status in (AlertStatus.resolved.value, AlertStatus.dismissed.value):
raise BusinessRuleViolation(f"Cannot dismiss alert in status '{inst.status}'.")
return _transition(db, instance_id, AlertStatus.dismissed.value, user_id)
def get_summary(db: Session) -> dict:
instances = db.query(AlertInstance).all()
by_status = {s.value: 0 for s in AlertStatus}
by_severity = {s.value: 0 for s in AlertSeverity}
by_type = {}
for i in instances:
by_status[i.status] = by_status.get(i.status, 0) + 1
by_severity[i.severity] = by_severity.get(i.severity, 0) + 1
by_type[i.rule_type] = by_type.get(i.rule_type, 0) + 1
recent = (
db.query(AlertInstance)
.filter(AlertInstance.status == AlertStatus.open.value)
.order_by(AlertInstance.created_at.desc())
.limit(5)
.all()
)
return {
"total_open": by_status.get(AlertStatus.open.value, 0),
"total_acknowledged": by_status.get(AlertStatus.acknowledged.value, 0),
"total_resolved": by_status.get(AlertStatus.resolved.value, 0),
"by_severity": by_severity,
"by_rule_type": by_type,
"recent_alerts": recent,
}