feat(compliance): data classification fields and retention policies job [FASE-3.5]
Some checks failed
Aegis CI / lint-and-test (push) Has been cancelled

This commit is contained in:
2026-05-18 14:17:29 +02:00
parent 3e854b7b79
commit bdeeed54e1
10 changed files with 213 additions and 1 deletions

View File

@@ -0,0 +1,58 @@
"""Phase 3: audit trail columns and data classification fields.
Revision ID: b029phase3
Revises: b028phase0
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "b029phase3"
down_revision: Union[str, None] = "b028phase0"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def _column_names(table: str) -> set[str]:
bind = op.get_bind()
insp = sa.inspect(bind)
return {c["name"] for c in insp.get_columns(table)}
def upgrade() -> None:
audit_cols = _column_names("audit_logs")
if "ip_address" not in audit_cols:
op.add_column("audit_logs", sa.Column("ip_address", sa.String(45), nullable=True))
if "user_agent" not in audit_cols:
op.add_column("audit_logs", sa.Column("user_agent", sa.String(500), nullable=True))
if "integrity_hash" not in audit_cols:
op.add_column("audit_logs", sa.Column("integrity_hash", sa.String(64), nullable=True))
if "session_id" not in audit_cols:
op.add_column("audit_logs", sa.Column("session_id", sa.String(100), nullable=True))
for table in ("tests", "evidences", "campaigns"):
cols = _column_names(table)
if "data_classification" not in cols:
op.add_column(
table,
sa.Column(
"data_classification",
sa.String(20),
nullable=False,
server_default="internal",
),
)
def downgrade() -> None:
for table in ("campaigns", "evidences", "tests"):
cols = _column_names(table)
if "data_classification" in cols:
op.drop_column(table, "data_classification")
audit_cols = _column_names("audit_logs")
for col in ("session_id", "integrity_hash", "user_agent", "ip_address"):
if col in audit_cols:
op.drop_column("audit_logs", col)

View File

@@ -35,3 +35,10 @@ class TestResult(str, enum.Enum):
detected = "detected"
not_detected = "not_detected"
partially_detected = "partially_detected"
class DataClassification(str, enum.Enum):
public = "public"
internal = "internal"
sensitive = "sensitive"
restricted = "restricted"

View File

@@ -23,6 +23,7 @@ from app.services.campaign_scheduler_service import check_and_run_recurring_camp
from app.jobs.jira_sync_job import sync_all_jira_links
from app.services.osint_enrichment_service import enrich_all_techniques
from app.services.stale_detection_service import detect_stale_coverage
from app.jobs.retention_job import run_retention_job
logger = logging.getLogger(__name__)
@@ -217,10 +218,19 @@ def start_scheduler() -> None:
name="Stale coverage detection (daily)",
replace_existing=True,
)
scheduler.add_job(
run_retention_job,
trigger="interval",
hours=24,
id="retention_policies",
name="Data retention policies (daily)",
replace_existing=True,
)
scheduler.start()
logger.info(
"Background scheduler started — mitre_sync (24h), intel_scan (7d), "
"notification_cleanup (24h), weekly_snapshot (Sundays 00:00), "
"recurring_campaigns (daily), jira_sync (1h), "
"osint_enrichment (weekly), stale_detection (daily)"
"osint_enrichment (weekly), stale_detection (daily), "
"retention_policies (daily)"
)

View File

@@ -0,0 +1,53 @@
"""Data retention policies — scheduled cleanup of aged records."""
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
from sqlalchemy.orm import Session
from app.database import SessionLocal
from app.models.audit import AuditLog
from app.services.notification_service import cleanup_old_notifications
logger = logging.getLogger(__name__)
AUDIT_LOG_RETENTION_DAYS = 730
def apply_retention_policies(db: Session) -> dict[str, int]:
"""Apply retention rules. Commits the session before returning."""
cutoff = datetime.now(timezone.utc) - timedelta(days=AUDIT_LOG_RETENTION_DAYS)
deleted_audit = (
db.query(AuditLog)
.filter(AuditLog.timestamp < cutoff)
.delete(synchronize_session=False)
)
if deleted_audit:
logger.info(
"Retention: deleted %d audit logs older than %d days",
deleted_audit,
AUDIT_LOG_RETENTION_DAYS,
)
deleted_notifications = cleanup_old_notifications(db, days=90)
db.commit()
return {
"audit_logs_deleted": deleted_audit,
"notifications_deleted": deleted_notifications,
}
def run_retention_job() -> None:
"""Entry point for the daily retention scheduler job."""
logger.info("Scheduled retention job starting...")
db = SessionLocal()
try:
summary = apply_retention_policies(db)
logger.info("Retention job finished — %s", summary)
except Exception:
logger.exception("Retention job failed")
db.rollback()
finally:
db.close()

View File

@@ -53,6 +53,7 @@ class Campaign(Base):
target_platform = Column(String, nullable=True)
tags = Column(JSONB, nullable=True, default=[])
created_at = Column(DateTime(timezone=True), server_default=func.now())
data_classification = Column(String(20), nullable=False, server_default="internal")
# Recurring scheduling fields
is_recurring = Column(Boolean, default=False)

View File

@@ -6,6 +6,7 @@ working with ``from app.models.enums import ...``.
"""
from app.domain.enums import ( # noqa: F401
DataClassification,
TeamSide,
TechniqueStatus,
TestResult,

View File

@@ -28,6 +28,7 @@ class Evidence(Base):
uploaded_at = Column(DateTime(timezone=True), server_default=func.now())
team = Column(Enum(TeamSide, name="teamside"), nullable=False, default=TeamSide.red)
notes = Column(Text, nullable=True)
data_classification = Column(String(20), nullable=False, server_default="internal")
# Relationships
test = relationship("Test", back_populates="evidences")

View File

@@ -62,6 +62,7 @@ class Test(Base):
# ── Re-test fields ────────────────────────────────────────────
retest_of = Column(UUID(as_uuid=True), ForeignKey("tests.id"), nullable=True)
retest_count = Column(Integer, default=0)
data_classification = Column(String(20), nullable=False, server_default="internal")
# ── Relationships ───────────────────────────────────────────────
technique = relationship("Technique", back_populates="tests")

View File

@@ -5,6 +5,7 @@ from datetime import datetime
from pydantic import BaseModel, ConfigDict
from app.domain.enums import DataClassification
from app.models.enums import TestResult, TestState
@@ -25,6 +26,12 @@ class TestCreate(BaseModel):
# ── Update (general) ───────────────────────────────────────────────
class TestClassificationUpdate(BaseModel):
"""Admin-only payload for changing data classification."""
data_classification: DataClassification
class TestUpdate(BaseModel):
"""Payload for partially updating an existing test.
Every field is optional so callers send only what changed."""
@@ -152,6 +159,7 @@ class TestOut(BaseModel):
# Re-test fields
retest_of: uuid.UUID | None = None
retest_count: int = 0
data_classification: str = "internal"
# Technique info (populated when joined)
technique_mitre_id: str | None = None

View File

@@ -0,0 +1,72 @@
"""Tests for data classification fields and admin updates."""
from app.models.enums import TestState
from app.models.test import Test
from app.models.technique import Technique
def _seed_technique(db) -> Technique:
technique = Technique(
mitre_id="T9999",
name="Test Technique",
tactic="test",
platforms=["linux"],
)
db.add(technique)
db.commit()
db.refresh(technique)
return technique
def test_new_test_defaults_to_internal(db, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Classification test",
created_by=red_lead_user.id,
)
db.add(test)
db.commit()
db.refresh(test)
assert test.data_classification == "internal"
def test_admin_can_update_classification(client, db, admin_user, admin_token, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Classify me",
created_by=red_lead_user.id,
state=TestState.draft,
)
db.add(test)
db.commit()
response = client.patch(
f"/api/v1/tests/{test.id}/classification",
json={"data_classification": "sensitive"},
headers={"Authorization": f"Bearer {admin_token}"},
)
assert response.status_code == 200
assert response.json()["data_classification"] == "sensitive"
db.refresh(test)
assert test.data_classification == "sensitive"
def test_non_admin_cannot_update_classification(client, db, admin_user, red_lead_token, red_lead_user):
technique = _seed_technique(db)
test = Test(
technique_id=technique.id,
name="Protected",
created_by=red_lead_user.id,
)
db.add(test)
db.commit()
response = client.patch(
f"/api/v1/tests/{test.id}/classification",
json={"data_classification": "restricted"},
headers={"Authorization": f"Bearer {red_lead_token}"},
)
assert response.status_code == 403