feat(compliance): data classification fields and retention policies job [FASE-3.5]
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled

This commit is contained in:
2026-05-18 14:17:29 +02:00
parent 3e854b7b79
commit bdeeed54e1
10 changed files with 213 additions and 1 deletions

View File

@@ -0,0 +1,58 @@
"""Phase 3: audit trail columns and data classification fields.
Revision ID: b029phase3
Revises: b028phase0
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "b029phase3"
down_revision: Union[str, None] = "b028phase0"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def _column_names(table: str) -> set[str]:
bind = op.get_bind()
insp = sa.inspect(bind)
return {c["name"] for c in insp.get_columns(table)}
def upgrade() -> None:
audit_cols = _column_names("audit_logs")
if "ip_address" not in audit_cols:
op.add_column("audit_logs", sa.Column("ip_address", sa.String(45), nullable=True))
if "user_agent" not in audit_cols:
op.add_column("audit_logs", sa.Column("user_agent", sa.String(500), nullable=True))
if "integrity_hash" not in audit_cols:
op.add_column("audit_logs", sa.Column("integrity_hash", sa.String(64), nullable=True))
if "session_id" not in audit_cols:
op.add_column("audit_logs", sa.Column("session_id", sa.String(100), nullable=True))
for table in ("tests", "evidences", "campaigns"):
cols = _column_names(table)
if "data_classification" not in cols:
op.add_column(
table,
sa.Column(
"data_classification",
sa.String(20),
nullable=False,
server_default="internal",
),
)
def downgrade() -> None:
for table in ("campaigns", "evidences", "tests"):
cols = _column_names(table)
if "data_classification" in cols:
op.drop_column(table, "data_classification")
audit_cols = _column_names("audit_logs")
for col in ("session_id", "integrity_hash", "user_agent", "ip_address"):
if col in audit_cols:
op.drop_column("audit_logs", col)

View File

@@ -35,3 +35,10 @@ class TestResult(str, enum.Enum):
detected = "detected" detected = "detected"
not_detected = "not_detected" not_detected = "not_detected"
partially_detected = "partially_detected" partially_detected = "partially_detected"
class DataClassification(str, enum.Enum):
public = "public"
internal = "internal"
sensitive = "sensitive"
restricted = "restricted"

View File

@@ -23,6 +23,7 @@ from app.services.campaign_scheduler_service import check_and_run_recurring_camp
from app.jobs.jira_sync_job import sync_all_jira_links from app.jobs.jira_sync_job import sync_all_jira_links
from app.services.osint_enrichment_service import enrich_all_techniques from app.services.osint_enrichment_service import enrich_all_techniques
from app.services.stale_detection_service import detect_stale_coverage from app.services.stale_detection_service import detect_stale_coverage
from app.jobs.retention_job import run_retention_job
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -217,10 +218,19 @@ def start_scheduler() -> None:
name="Stale coverage detection (daily)", name="Stale coverage detection (daily)",
replace_existing=True, replace_existing=True,
) )
scheduler.add_job(
run_retention_job,
trigger="interval",
hours=24,
id="retention_policies",
name="Data retention policies (daily)",
replace_existing=True,
)
scheduler.start() scheduler.start()
logger.info( logger.info(
"Background scheduler started — mitre_sync (24h), intel_scan (7d), " "Background scheduler started — mitre_sync (24h), intel_scan (7d), "
"notification_cleanup (24h), weekly_snapshot (Sundays 00:00), " "notification_cleanup (24h), weekly_snapshot (Sundays 00:00), "
"recurring_campaigns (daily), jira_sync (1h), " "recurring_campaigns (daily), jira_sync (1h), "
"osint_enrichment (weekly), stale_detection (daily)" "osint_enrichment (weekly), stale_detection (daily), "
"retention_policies (daily)"
) )

View File

@@ -0,0 +1,53 @@
"""Data retention policies — scheduled cleanup of aged records."""
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
from sqlalchemy.orm import Session
from app.database import SessionLocal
from app.models.audit import AuditLog
from app.services.notification_service import cleanup_old_notifications
logger = logging.getLogger(__name__)
AUDIT_LOG_RETENTION_DAYS = 730
def apply_retention_policies(db: Session) -> dict[str, int]:
"""Apply retention rules. Commits the session before returning."""
cutoff = datetime.now(timezone.utc) - timedelta(days=AUDIT_LOG_RETENTION_DAYS)
deleted_audit = (
db.query(AuditLog)
.filter(AuditLog.timestamp < cutoff)
.delete(synchronize_session=False)
)
if deleted_audit:
logger.info(
"Retention: deleted %d audit logs older than %d days",
deleted_audit,
AUDIT_LOG_RETENTION_DAYS,
)
deleted_notifications = cleanup_old_notifications(db, days=90)
db.commit()
return {
"audit_logs_deleted": deleted_audit,
"notifications_deleted": deleted_notifications,
}
def run_retention_job() -> None:
"""Entry point for the daily retention scheduler job."""
logger.info("Scheduled retention job starting...")
db = SessionLocal()
try:
summary = apply_retention_policies(db)
logger.info("Retention job finished — %s", summary)
except Exception:
logger.exception("Retention job failed")
db.rollback()
finally:
db.close()

View File

@@ -53,6 +53,7 @@ class Campaign(Base):
target_platform = Column(String, nullable=True) target_platform = Column(String, nullable=True)
tags = Column(JSONB, nullable=True, default=[]) tags = Column(JSONB, nullable=True, default=[])
created_at = Column(DateTime(timezone=True), server_default=func.now()) created_at = Column(DateTime(timezone=True), server_default=func.now())
data_classification = Column(String(20), nullable=False, server_default="internal")
# Recurring scheduling fields # Recurring scheduling fields
is_recurring = Column(Boolean, default=False) is_recurring = Column(Boolean, default=False)

View File

@@ -6,6 +6,7 @@ working with ``from app.models.enums import ...``.
""" """
from app.domain.enums import ( # noqa: F401 from app.domain.enums import ( # noqa: F401
DataClassification,
TeamSide, TeamSide,
TechniqueStatus, TechniqueStatus,
TestResult, TestResult,

View File

@@ -28,6 +28,7 @@ class Evidence(Base):
uploaded_at = Column(DateTime(timezone=True), server_default=func.now()) uploaded_at = Column(DateTime(timezone=True), server_default=func.now())
team = Column(Enum(TeamSide, name="teamside"), nullable=False, default=TeamSide.red) team = Column(Enum(TeamSide, name="teamside"), nullable=False, default=TeamSide.red)
notes = Column(Text, nullable=True) notes = Column(Text, nullable=True)
data_classification = Column(String(20), nullable=False, server_default="internal")
# Relationships # Relationships
test = relationship("Test", back_populates="evidences") test = relationship("Test", back_populates="evidences")

View File

@@ -62,6 +62,7 @@ class Test(Base):
# ── Re-test fields ──────────────────────────────────────────── # ── Re-test fields ────────────────────────────────────────────
retest_of = Column(UUID(as_uuid=True), ForeignKey("tests.id"), nullable=True) retest_of = Column(UUID(as_uuid=True), ForeignKey("tests.id"), nullable=True)
retest_count = Column(Integer, default=0) retest_count = Column(Integer, default=0)
data_classification = Column(String(20), nullable=False, server_default="internal")
# ── Relationships ─────────────────────────────────────────────── # ── Relationships ───────────────────────────────────────────────
technique = relationship("Technique", back_populates="tests") technique = relationship("Technique", back_populates="tests")

View File

@@ -5,6 +5,7 @@ from datetime import datetime
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
from app.domain.enums import DataClassification
from app.models.enums import TestResult, TestState from app.models.enums import TestResult, TestState
@@ -25,6 +26,12 @@ class TestCreate(BaseModel):
# ── Update (general) ─────────────────────────────────────────────── # ── Update (general) ───────────────────────────────────────────────
class TestClassificationUpdate(BaseModel):
"""Admin-only payload for changing data classification."""
data_classification: DataClassification
class TestUpdate(BaseModel): class TestUpdate(BaseModel):
"""Payload for partially updating an existing test. """Payload for partially updating an existing test.
Every field is optional so callers send only what changed.""" Every field is optional so callers send only what changed."""
@@ -152,6 +159,7 @@ class TestOut(BaseModel):
# Re-test fields # Re-test fields
retest_of: uuid.UUID | None = None retest_of: uuid.UUID | None = None
retest_count: int = 0 retest_count: int = 0
data_classification: str = "internal"
# Technique info (populated when joined) # Technique info (populated when joined)
technique_mitre_id: str | None = None technique_mitre_id: str | None = None

View File

@@ -0,0 +1,72 @@
"""Tests for data classification fields and admin updates."""
from app.models.enums import TestState
from app.models.test import Test
from app.models.technique import Technique
def _seed_technique(db) -> Technique:
technique = Technique(
mitre_id="T9999",
name="Test Technique",
tactic="test",
platforms=["linux"],
)
db.add(technique)
db.commit()
db.refresh(technique)
return technique
def test_new_test_defaults_to_internal(db, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Classification test",
created_by=red_lead_user.id,
)
db.add(test)
db.commit()
db.refresh(test)
assert test.data_classification == "internal"
def test_admin_can_update_classification(client, db, admin_user, admin_token, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Classify me",
created_by=red_lead_user.id,
state=TestState.draft,
)
db.add(test)
db.commit()
response = client.patch(
f"/api/v1/tests/{test.id}/classification",
json={"data_classification": "sensitive"},
headers={"Authorization": f"Bearer {admin_token}"},
)
assert response.status_code == 200
assert response.json()["data_classification"] == "sensitive"
db.refresh(test)
assert test.data_classification == "sensitive"
def test_non_admin_cannot_update_classification(client, db, admin_user, red_lead_token, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Protected",
created_by=red_lead_user.id,
)
db.add(test)
db.commit()
response = client.patch(
f"/api/v1/tests/{test.id}/classification",
json={"data_classification": "restricted"},
headers={"Authorization": f"Bearer {red_lead_token}"},
)
assert response.status_code == 403