From bdeeed54e1c2f4e0027db3ad73844b446c9298b0 Mon Sep 17 00:00:00 2001 From: Kitos Date: Mon, 18 May 2026 14:17:29 +0200 Subject: [PATCH] feat(compliance): data classification fields and retention policies job [FASE-3.5] --- .../b029_phase3_audit_and_classification.py | 58 +++++++++++++++ backend/app/domain/enums.py | 7 ++ backend/app/jobs/mitre_sync_job.py | 12 +++- backend/app/jobs/retention_job.py | 53 ++++++++++++++ backend/app/models/campaign.py | 1 + backend/app/models/enums.py | 1 + backend/app/models/evidence.py | 1 + backend/app/models/test.py | 1 + backend/app/schemas/test.py | 8 +++ backend/tests/test_data_classification.py | 72 +++++++++++++++++++ 10 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 backend/alembic/versions/b029_phase3_audit_and_classification.py create mode 100644 backend/app/jobs/retention_job.py create mode 100644 backend/tests/test_data_classification.py diff --git a/backend/alembic/versions/b029_phase3_audit_and_classification.py b/backend/alembic/versions/b029_phase3_audit_and_classification.py new file mode 100644 index 0000000..4224f00 --- /dev/null +++ b/backend/alembic/versions/b029_phase3_audit_and_classification.py @@ -0,0 +1,58 @@ +"""Phase 3: audit trail columns and data classification fields. + +Revision ID: b029phase3 +Revises: b028phase0 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "b029phase3" +down_revision: Union[str, None] = "b028phase0" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _column_names(table: str) -> set[str]: + bind = op.get_bind() + insp = sa.inspect(bind) + return {c["name"] for c in insp.get_columns(table)} + + +def upgrade() -> None: + audit_cols = _column_names("audit_logs") + if "ip_address" not in audit_cols: + op.add_column("audit_logs", sa.Column("ip_address", sa.String(45), nullable=True)) + if "user_agent" not in audit_cols: + op.add_column("audit_logs", sa.Column("user_agent", sa.String(500), nullable=True)) + if "integrity_hash" not in audit_cols: + op.add_column("audit_logs", sa.Column("integrity_hash", sa.String(64), nullable=True)) + if "session_id" not in audit_cols: + op.add_column("audit_logs", sa.Column("session_id", sa.String(100), nullable=True)) + + for table in ("tests", "evidences", "campaigns"): + cols = _column_names(table) + if "data_classification" not in cols: + op.add_column( + table, + sa.Column( + "data_classification", + sa.String(20), + nullable=False, + server_default="internal", + ), + ) + + +def downgrade() -> None: + for table in ("campaigns", "evidences", "tests"): + cols = _column_names(table) + if "data_classification" in cols: + op.drop_column(table, "data_classification") + + audit_cols = _column_names("audit_logs") + for col in ("session_id", "integrity_hash", "user_agent", "ip_address"): + if col in audit_cols: + op.drop_column("audit_logs", col) diff --git a/backend/app/domain/enums.py b/backend/app/domain/enums.py index 8e7afa2..0f8405a 100644 --- a/backend/app/domain/enums.py +++ b/backend/app/domain/enums.py @@ -35,3 +35,10 @@ class TestResult(str, enum.Enum): detected = "detected" not_detected = "not_detected" partially_detected = "partially_detected" + + +class DataClassification(str, enum.Enum): + public = "public" + internal = "internal" + sensitive = "sensitive" + restricted = "restricted" diff --git a/backend/app/jobs/mitre_sync_job.py b/backend/app/jobs/mitre_sync_job.py index 133e37a..45a9680 100644 --- a/backend/app/jobs/mitre_sync_job.py +++ b/backend/app/jobs/mitre_sync_job.py @@ -23,6 +23,7 @@ from app.services.campaign_scheduler_service import check_and_run_recurring_camp from app.jobs.jira_sync_job import sync_all_jira_links from app.services.osint_enrichment_service import enrich_all_techniques from app.services.stale_detection_service import detect_stale_coverage +from app.jobs.retention_job import run_retention_job logger = logging.getLogger(__name__) @@ -217,10 +218,19 @@ def start_scheduler() -> None: name="Stale coverage detection (daily)", replace_existing=True, ) + scheduler.add_job( + run_retention_job, + trigger="interval", + hours=24, + id="retention_policies", + name="Data retention policies (daily)", + replace_existing=True, + ) scheduler.start() logger.info( "Background scheduler started — mitre_sync (24h), intel_scan (7d), " "notification_cleanup (24h), weekly_snapshot (Sundays 00:00), " "recurring_campaigns (daily), jira_sync (1h), " - "osint_enrichment (weekly), stale_detection (daily)" + "osint_enrichment (weekly), stale_detection (daily), " + "retention_policies (daily)" ) diff --git a/backend/app/jobs/retention_job.py b/backend/app/jobs/retention_job.py new file mode 100644 index 0000000..1cd056e --- /dev/null +++ b/backend/app/jobs/retention_job.py @@ -0,0 +1,53 @@ +"""Data retention policies — scheduled cleanup of aged records.""" + +from __future__ import annotations + +import logging +from datetime import datetime, timedelta, timezone + +from sqlalchemy.orm import Session + +from app.database import SessionLocal +from app.models.audit import AuditLog +from app.services.notification_service import cleanup_old_notifications + +logger = logging.getLogger(__name__) + +AUDIT_LOG_RETENTION_DAYS = 730 + + +def apply_retention_policies(db: Session) -> dict[str, int]: + """Apply retention rules. Commits the session before returning.""" + cutoff = datetime.now(timezone.utc) - timedelta(days=AUDIT_LOG_RETENTION_DAYS) + deleted_audit = ( + db.query(AuditLog) + .filter(AuditLog.timestamp < cutoff) + .delete(synchronize_session=False) + ) + if deleted_audit: + logger.info( + "Retention: deleted %d audit logs older than %d days", + deleted_audit, + AUDIT_LOG_RETENTION_DAYS, + ) + + deleted_notifications = cleanup_old_notifications(db, days=90) + db.commit() + return { + "audit_logs_deleted": deleted_audit, + "notifications_deleted": deleted_notifications, + } + + +def run_retention_job() -> None: + """Entry point for the daily retention scheduler job.""" + logger.info("Scheduled retention job starting...") + db = SessionLocal() + try: + summary = apply_retention_policies(db) + logger.info("Retention job finished — %s", summary) + except Exception: + logger.exception("Retention job failed") + db.rollback() + finally: + db.close() diff --git a/backend/app/models/campaign.py b/backend/app/models/campaign.py index 062f0d7..464972f 100644 --- a/backend/app/models/campaign.py +++ b/backend/app/models/campaign.py @@ -53,6 +53,7 @@ class Campaign(Base): target_platform = Column(String, nullable=True) tags = Column(JSONB, nullable=True, default=[]) created_at = Column(DateTime(timezone=True), server_default=func.now()) + data_classification = Column(String(20), nullable=False, server_default="internal") # Recurring scheduling fields is_recurring = Column(Boolean, default=False) diff --git a/backend/app/models/enums.py b/backend/app/models/enums.py index 6e1cb32..b941d3e 100644 --- a/backend/app/models/enums.py +++ b/backend/app/models/enums.py @@ -6,6 +6,7 @@ working with ``from app.models.enums import ...``. """ from app.domain.enums import ( # noqa: F401 + DataClassification, TeamSide, TechniqueStatus, TestResult, diff --git a/backend/app/models/evidence.py b/backend/app/models/evidence.py index 149fca9..0f87db2 100644 --- a/backend/app/models/evidence.py +++ b/backend/app/models/evidence.py @@ -28,6 +28,7 @@ class Evidence(Base): uploaded_at = Column(DateTime(timezone=True), server_default=func.now()) team = Column(Enum(TeamSide, name="teamside"), nullable=False, default=TeamSide.red) notes = Column(Text, nullable=True) + data_classification = Column(String(20), nullable=False, server_default="internal") # Relationships test = relationship("Test", back_populates="evidences") diff --git a/backend/app/models/test.py b/backend/app/models/test.py index 166d137..2388495 100644 --- a/backend/app/models/test.py +++ b/backend/app/models/test.py @@ -62,6 +62,7 @@ class Test(Base): # ── Re-test fields ──────────────────────────────────────────── retest_of = Column(UUID(as_uuid=True), ForeignKey("tests.id"), nullable=True) retest_count = Column(Integer, default=0) + data_classification = Column(String(20), nullable=False, server_default="internal") # ── Relationships ─────────────────────────────────────────────── technique = relationship("Technique", back_populates="tests") diff --git a/backend/app/schemas/test.py b/backend/app/schemas/test.py index 34d5d92..c9369e8 100644 --- a/backend/app/schemas/test.py +++ b/backend/app/schemas/test.py @@ -5,6 +5,7 @@ from datetime import datetime from pydantic import BaseModel, ConfigDict +from app.domain.enums import DataClassification from app.models.enums import TestResult, TestState @@ -25,6 +26,12 @@ class TestCreate(BaseModel): # ── Update (general) ─────────────────────────────────────────────── +class TestClassificationUpdate(BaseModel): + """Admin-only payload for changing data classification.""" + + data_classification: DataClassification + + class TestUpdate(BaseModel): """Payload for partially updating an existing test. Every field is optional so callers send only what changed.""" @@ -152,6 +159,7 @@ class TestOut(BaseModel): # Re-test fields retest_of: uuid.UUID | None = None retest_count: int = 0 + data_classification: str = "internal" # Technique info (populated when joined) technique_mitre_id: str | None = None diff --git a/backend/tests/test_data_classification.py b/backend/tests/test_data_classification.py new file mode 100644 index 0000000..45e8b58 --- /dev/null +++ b/backend/tests/test_data_classification.py @@ -0,0 +1,72 @@ +"""Tests for data classification fields and admin updates.""" + +from app.models.enums import TestState +from app.models.test import Test +from app.models.technique import Technique + + +def _seed_technique(db) -> Technique: + technique = Technique( + mitre_id="T9999", + name="Test Technique", + tactic="test", + platforms=["linux"], + ) + db.add(technique) + db.commit() + db.refresh(technique) + return technique + + +def test_new_test_defaults_to_internal(db, red_lead_user): + technique = _seed_technique(db) + test = Test( + technique_id=technique.id, + name="Classification test", + created_by=red_lead_user.id, + ) + db.add(test) + db.commit() + db.refresh(test) + assert test.data_classification == "internal" + + +def test_admin_can_update_classification(client, db, admin_user, admin_token, red_lead_user): + technique = _seed_technique(db) + test = Test( + technique_id=technique.id, + name="Classify me", + created_by=red_lead_user.id, + state=TestState.draft, + ) + db.add(test) + db.commit() + + response = client.patch( + f"/api/v1/tests/{test.id}/classification", + json={"data_classification": "sensitive"}, + headers={"Authorization": f"Bearer {admin_token}"}, + ) + assert response.status_code == 200 + assert response.json()["data_classification"] == "sensitive" + + db.refresh(test) + assert test.data_classification == "sensitive" + + +def test_non_admin_cannot_update_classification(client, db, admin_user, red_lead_token, red_lead_user): + technique = _seed_technique(db) + test = Test( + technique_id=technique.id, + name="Protected", + created_by=red_lead_user.id, + ) + db.add(test) + db.commit() + + response = client.patch( + f"/api/v1/tests/{test.id}/classification", + json={"data_classification": "restricted"}, + headers={"Authorization": f"Bearer {red_lead_token}"}, + ) + assert response.status_code == 403