feat(scoring): composite recency decay and severity weights persisted in DB [FASE-5.1]
This commit is contained in:
@@ -9,7 +9,7 @@ fixed number of aggregated queries so that organisation-wide calculations
|
||||
never produce N+1 traffic.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import case, func
|
||||
@@ -25,6 +25,61 @@ from app.models.threat_actor import ThreatActor, ThreatActorTechnique
|
||||
from app.models.enums import TestState, TestResult
|
||||
from app.services.scoring_config_service import get_scoring_weights
|
||||
|
||||
_SEVERITY_FACTORS: dict[str, float] = {
|
||||
"critical": 1.0,
|
||||
"high": 0.85,
|
||||
"medium": 0.65,
|
||||
"low": 0.5,
|
||||
}
|
||||
|
||||
|
||||
def _recency_factor(last_tested: datetime | None) -> float:
|
||||
"""Decay factor: 1.0 when recent, decreasing over time."""
|
||||
if not last_tested:
|
||||
return 0.0
|
||||
now = datetime.now(timezone.utc)
|
||||
tested = last_tested
|
||||
if tested.tzinfo is None:
|
||||
tested = tested.replace(tzinfo=timezone.utc)
|
||||
days_ago = (now - tested).days
|
||||
if days_ago <= 90:
|
||||
return 1.0
|
||||
if days_ago <= 180:
|
||||
return 0.8
|
||||
if days_ago <= 365:
|
||||
return 0.5
|
||||
return 0.2
|
||||
|
||||
|
||||
def _severity_factor(severity_label: str | None) -> float:
|
||||
"""Map template severity to a 0–1 multiplier."""
|
||||
if not severity_label:
|
||||
return 0.7
|
||||
return _SEVERITY_FACTORS.get(severity_label.lower(), 0.7)
|
||||
|
||||
|
||||
def _max_severity_by_mitre(db: Session) -> dict[str, str]:
|
||||
"""Highest severity label per MITRE id from active test templates."""
|
||||
from app.models.test_template import TestTemplate
|
||||
|
||||
order = {"critical": 4, "high": 3, "medium": 2, "low": 1}
|
||||
rows = (
|
||||
db.query(TestTemplate.mitre_technique_id, TestTemplate.severity)
|
||||
.filter(
|
||||
TestTemplate.is_active == True, # noqa: E712
|
||||
TestTemplate.severity.isnot(None),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
best: dict[str, str] = {}
|
||||
for mitre_id, severity in rows:
|
||||
if not mitre_id or not severity:
|
||||
continue
|
||||
current = best.get(mitre_id)
|
||||
if current is None or order.get(severity.lower(), 0) > order.get(current.lower(), 0):
|
||||
best[mitre_id] = severity
|
||||
return best
|
||||
|
||||
|
||||
# ── Bulk scoring helpers (5 queries for ALL techniques) ───────────────
|
||||
|
||||
@@ -45,8 +100,15 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
w_tests = w.tests
|
||||
w_detection = w.detection_rules
|
||||
w_d3fend = w.d3fend
|
||||
w_freshness = w.freshness
|
||||
w_diversity = w.platform_diversity
|
||||
w_recency = w.recency
|
||||
w_severity = w.severity
|
||||
severity_by_mitre = _max_severity_by_mitre(db)
|
||||
|
||||
last_validated = func.coalesce(
|
||||
Test.blue_validated_at,
|
||||
Test.red_validated_at,
|
||||
Test.created_at,
|
||||
)
|
||||
|
||||
# Q1: test stats grouped by technique_id
|
||||
test_rows = (
|
||||
@@ -56,8 +118,7 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
func.count(
|
||||
case((Test.detection_result == TestResult.detected, Test.id))
|
||||
).label("detected_count"),
|
||||
func.max(Test.red_validated_at).label("latest_validated_at"),
|
||||
func.count(func.distinct(Test.platform)).label("platform_count"),
|
||||
func.max(last_validated).label("latest_validated_at"),
|
||||
)
|
||||
.filter(Test.state == TestState.validated)
|
||||
.group_by(Test.technique_id)
|
||||
@@ -70,7 +131,6 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
"validated": row.validated_count,
|
||||
"detected": row.detected_count,
|
||||
"latest_validated_at": row.latest_validated_at,
|
||||
"platform_count": row.platform_count,
|
||||
}
|
||||
|
||||
# Q2: active detection rules per mitre_id
|
||||
@@ -114,7 +174,6 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
# Q5: all techniques
|
||||
techniques = db.query(Technique).all()
|
||||
|
||||
now = datetime.utcnow()
|
||||
results: dict = {}
|
||||
|
||||
for tech in techniques:
|
||||
@@ -122,7 +181,6 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
validated = ts.get("validated", 0)
|
||||
detected = ts.get("detected", 0)
|
||||
latest_at = ts.get("latest_validated_at")
|
||||
plat_count = ts.get("platform_count", 0)
|
||||
|
||||
breakdown = {}
|
||||
|
||||
@@ -177,47 +235,41 @@ def bulk_technique_scores(db: Session) -> dict:
|
||||
),
|
||||
}
|
||||
|
||||
# 4. Freshness
|
||||
# 4. Recency decay
|
||||
recency_mult = _recency_factor(latest_at)
|
||||
recency_score = round(recency_mult * w_recency, 1)
|
||||
if latest_at:
|
||||
days_ago = (now - latest_at).days
|
||||
if days_ago < 90:
|
||||
freshness_pct = 1.0
|
||||
elif days_ago < 180:
|
||||
freshness_pct = 0.5
|
||||
tested = latest_at
|
||||
if tested.tzinfo is None:
|
||||
days_ago = (datetime.utcnow() - tested).days
|
||||
else:
|
||||
freshness_pct = 0.0
|
||||
freshness_score = round(freshness_pct * w_freshness, 1)
|
||||
freshness_detail = f"Last test {days_ago} days ago"
|
||||
days_ago = (datetime.now(timezone.utc) - tested.astimezone(timezone.utc)).days
|
||||
recency_detail = f"Last validated {days_ago} days ago (factor {recency_mult})"
|
||||
else:
|
||||
freshness_score = 0
|
||||
freshness_detail = "No validated tests"
|
||||
breakdown["freshness"] = {
|
||||
"score": freshness_score,
|
||||
"max": w_freshness,
|
||||
"detail": freshness_detail,
|
||||
recency_detail = "No validated tests"
|
||||
breakdown["recency"] = {
|
||||
"score": recency_score,
|
||||
"max": w_recency,
|
||||
"detail": recency_detail,
|
||||
}
|
||||
|
||||
# 5. Platform diversity
|
||||
available = tech.platforms or []
|
||||
total_platforms = len(available) if available else 3
|
||||
if total_platforms > 0 and plat_count > 0:
|
||||
diversity_score = round(
|
||||
min(plat_count / total_platforms, 1.0) * w_diversity, 1,
|
||||
)
|
||||
else:
|
||||
diversity_score = 0
|
||||
breakdown["platform_diversity"] = {
|
||||
"score": diversity_score,
|
||||
"max": w_diversity,
|
||||
# 5. Severity / criticality (template-driven)
|
||||
sev_label = severity_by_mitre.get(tech.mitre_id)
|
||||
sev_mult = _severity_factor(sev_label)
|
||||
severity_score = round(sev_mult * w_severity, 1)
|
||||
breakdown["severity"] = {
|
||||
"score": severity_score,
|
||||
"max": w_severity,
|
||||
"detail": (
|
||||
f"{plat_count}/{total_platforms} platforms covered"
|
||||
if plat_count > 0 else "No platforms tested"
|
||||
f"Template severity: {sev_label} (factor {sev_mult})"
|
||||
if sev_label
|
||||
else "No severity template (default factor)"
|
||||
),
|
||||
}
|
||||
|
||||
total = min(
|
||||
test_score + detection_score + d3fend_score
|
||||
+ freshness_score + diversity_score,
|
||||
+ recency_score + severity_score,
|
||||
100,
|
||||
)
|
||||
results[tech.id] = {
|
||||
@@ -265,8 +317,9 @@ def calculate_technique_score(technique: Technique, db: Session) -> dict:
|
||||
w_tests = w.tests
|
||||
w_detection = w.detection_rules
|
||||
w_d3fend = w.d3fend
|
||||
w_freshness = w.freshness
|
||||
w_diversity = w.platform_diversity
|
||||
w_recency = w.recency
|
||||
w_severity = w.severity
|
||||
severity_by_mitre = _max_severity_by_mitre(db)
|
||||
|
||||
breakdown = {}
|
||||
|
||||
@@ -360,65 +413,50 @@ def calculate_technique_score(technique: Technique, db: Session) -> dict:
|
||||
else "No D3FEND mappings",
|
||||
}
|
||||
|
||||
# ── 4. Freshness ──────────────────────────────────────────────
|
||||
most_recent_test = (
|
||||
db.query(func.max(Test.red_validated_at))
|
||||
.filter(
|
||||
Test.technique_id == technique.id,
|
||||
Test.state == TestState.validated,
|
||||
)
|
||||
.scalar()
|
||||
)
|
||||
# ── 4. Recency ────────────────────────────────────────────────
|
||||
most_recent_test = None
|
||||
for t in validated_tests:
|
||||
candidate = t.blue_validated_at or t.red_validated_at or t.created_at
|
||||
if candidate and (most_recent_test is None or candidate > most_recent_test):
|
||||
most_recent_test = candidate
|
||||
|
||||
now = datetime.utcnow()
|
||||
recency_mult = _recency_factor(most_recent_test)
|
||||
recency_score = round(recency_mult * w_recency, 1)
|
||||
if most_recent_test:
|
||||
days_ago = (now - most_recent_test).days
|
||||
if days_ago < 90:
|
||||
freshness_pct = 1.0
|
||||
elif days_ago < 180:
|
||||
freshness_pct = 0.5
|
||||
else:
|
||||
freshness_pct = 0.0
|
||||
freshness_score = round(freshness_pct * w_freshness, 1)
|
||||
freshness_detail = f"Last test {days_ago} days ago"
|
||||
days_ago = (
|
||||
datetime.now(timezone.utc) - (
|
||||
most_recent_test.replace(tzinfo=timezone.utc)
|
||||
if most_recent_test.tzinfo is None
|
||||
else most_recent_test.astimezone(timezone.utc)
|
||||
)
|
||||
).days
|
||||
recency_detail = f"Last validated {days_ago} days ago (factor {recency_mult})"
|
||||
else:
|
||||
freshness_pct = 0
|
||||
freshness_score = 0
|
||||
freshness_detail = "No validated tests"
|
||||
recency_detail = "No validated tests"
|
||||
|
||||
breakdown["freshness"] = {
|
||||
"score": freshness_score,
|
||||
"max": w_freshness,
|
||||
"detail": freshness_detail,
|
||||
breakdown["recency"] = {
|
||||
"score": recency_score,
|
||||
"max": w_recency,
|
||||
"detail": recency_detail,
|
||||
}
|
||||
|
||||
# ── 5. Platform diversity ─────────────────────────────────────
|
||||
available_platforms = technique.platforms or []
|
||||
total_platforms = len(available_platforms) if available_platforms else 3
|
||||
|
||||
tested_platforms = set()
|
||||
for t in validated_tests:
|
||||
if t.platform:
|
||||
tested_platforms.add(t.platform.lower())
|
||||
|
||||
if total_platforms > 0 and tested_platforms:
|
||||
diversity_ratio = min(len(tested_platforms) / total_platforms, 1.0)
|
||||
diversity_score = round(diversity_ratio * w_diversity, 1)
|
||||
else:
|
||||
diversity_ratio = 0
|
||||
diversity_score = 0
|
||||
|
||||
breakdown["platform_diversity"] = {
|
||||
"score": diversity_score,
|
||||
"max": w_diversity,
|
||||
"detail": f"{len(tested_platforms)}/{total_platforms} platforms covered"
|
||||
if tested_platforms
|
||||
else "No platforms tested",
|
||||
# ── 5. Severity ───────────────────────────────────────────────
|
||||
sev_label = severity_by_mitre.get(technique.mitre_id)
|
||||
sev_mult = _severity_factor(sev_label)
|
||||
severity_score = round(sev_mult * w_severity, 1)
|
||||
breakdown["severity"] = {
|
||||
"score": severity_score,
|
||||
"max": w_severity,
|
||||
"detail": (
|
||||
f"Template severity: {sev_label} (factor {sev_mult})"
|
||||
if sev_label
|
||||
else "No severity template (default factor)"
|
||||
),
|
||||
}
|
||||
|
||||
# ── Total ─────────────────────────────────────────────────────
|
||||
total = min(
|
||||
test_score + detection_score + d3fend_score + freshness_score + diversity_score,
|
||||
test_score + detection_score + d3fend_score + recency_score + severity_score,
|
||||
100,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user