Aegis/backend/app/services/compliance_import_service.py

"""Compliance import service — imports NIST 800-53 to ATT&CK mappings.

Downloads and parses the STIX bundle from the Center for Threat-Informed
Defense's attack_to_nist_mapping repository to create ComplianceFramework,
ComplianceControl, and ComplianceControlMapping records.
"""

import logging
import json
import re
from typing import Optional

import requests
from sqlalchemy.orm import Session

from app.models.compliance import (
    ComplianceFramework,
    ComplianceControl,
    ComplianceControlMapping,
)
from app.models.technique import Technique

logger = logging.getLogger(__name__)

# URL for the NIST 800-53 Rev 5 to ATT&CK mapping
# This is the JSON STIX bundle that contains the relationships
NIST_MAPPING_URL = (
    "https://raw.githubusercontent.com/center-for-threat-informed-defense/"
    "attack_to_nist_mapping/main/data/attack-to-nist-rev5.json"
)


def import_nist_800_53_mappings(db: Session) -> dict:
    """Import NIST 800-53 Rev 5 mappings from MITRE CTI repository.

    Steps:
    1. Create or get the NIST 800-53 Rev 5 framework
    2. Download the STIX bundle JSON
    3. Parse controls and relationship objects
    4. Create ComplianceControl records
    5. Create ComplianceControlMapping records

    Returns a summary dict with counts.
    """
    # ── 1. Create or get framework ────────────────────────────────
    framework = (
        db.query(ComplianceFramework)
        .filter(ComplianceFramework.name == "NIST 800-53 Rev 5")
        .first()
    )

    if not framework:
        framework = ComplianceFramework(
            name="NIST 800-53 Rev 5",
            version="5",
            description="National Institute of Standards and Technology Special Publication 800-53 Revision 5 — Security and Privacy Controls for Information Systems and Organizations",
            url="https://csrc.nist.gov/publications/detail/sp/800-53/rev-5/final",
            is_active=True,
        )
        db.add(framework)
        db.flush()
        logger.info("Created NIST 800-53 Rev 5 framework")
    else:
        logger.info("NIST 800-53 Rev 5 framework already exists")

    # ── 2. Download STIX bundle ───────────────────────────────────
    try:
        response = requests.get(NIST_MAPPING_URL, timeout=30)
        response.raise_for_status()
        stix_bundle = response.json()
    except requests.RequestException as e:
        logger.warning(f"Failed to download STIX bundle: {e}")
        # Fallback: create a sample set of well-known NIST controls
        return _import_sample_nist_mappings(db, framework)

    # ── 3. Parse STIX objects ─────────────────────────────────────
    objects = stix_bundle.get("objects", [])

    # Build lookup maps
    # STIX IDs -> control info
    control_map = {}       # stix_id -> {control_id, title, category}
    technique_map = {}     # stix_id -> mitre_technique_id
    relationships = []     # (source_ref, target_ref) for "mitigates" relationships

    for obj in objects:
        obj_type = obj.get("type", "")

        if obj_type == "course-of-action":
            # This is a NIST control
            name = obj.get("name", "")
            desc = obj.get("description", "")
            stix_id = obj.get("id", "")

            # Extract control ID from name (e.g., "AC-2 Account Management")
            match = re.match(r"^([A-Z]{2}-\d+(?:\.\d+)?)\s*(.*)", name)
            if match:
                control_id = match.group(1)
                title = match.group(2) or name
            else:
                control_id = name
                title = name

            # Extract category from control family
            category_match = re.match(r"^([A-Z]{2})", control_id)
            category = _get_nist_category(category_match.group(1)) if category_match else None

            control_map[stix_id] = {
                "control_id": control_id,
                "title": title,
                "description": desc[:500] if desc else None,
                "category": category,
            }

        elif obj_type == "attack-pattern":
            # This is an ATT&CK technique
            stix_id = obj.get("id", "")
            ext_refs = obj.get("external_references", [])
            for ref in ext_refs:
                if ref.get("source_name") == "mitre-attack":
                    technique_map[stix_id] = ref.get("external_id", "")
                    break

        elif obj_type == "relationship":
            rel_type = obj.get("relationship_type", "")
            if rel_type == "mitigates":
                source_ref = obj.get("source_ref", "")
                target_ref = obj.get("target_ref", "")
                relationships.append((source_ref, target_ref))

    # ── 4. Create controls ────────────────────────────────────────
    controls_created = 0
    controls_existing = 0
    control_db_map = {}  # control_id -> ComplianceControl

    # Load existing controls for this framework
    existing_controls = {
        c.control_id: c
        for c in db.query(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    }

    for stix_id, info in control_map.items():
        cid = info["control_id"]
        if cid in existing_controls:
            control_db_map[stix_id] = existing_controls[cid]
            controls_existing += 1
        else:
            ctrl = ComplianceControl(
                framework_id=framework.id,
                control_id=cid,
                title=info["title"],
                description=info["description"],
                category=info["category"],
            )
            db.add(ctrl)
            db.flush()
            control_db_map[stix_id] = ctrl
            controls_created += 1

    # ── 5. Create mappings ────────────────────────────────────────
    mappings_created = 0
    mappings_skipped = 0

    # Build technique DB lookup (mitre_id -> Technique)
    all_techniques = {t.mitre_id: t for t in db.query(Technique).all()}

    # Load existing mappings
    existing_mappings = set()
    for m in db.query(ComplianceControlMapping).all():
        existing_mappings.add((str(m.compliance_control_id), str(m.technique_id)))

    for source_ref, target_ref in relationships:
        control = control_db_map.get(source_ref)
        mitre_id = technique_map.get(target_ref)

        if not control or not mitre_id:
            mappings_skipped += 1
            continue

        technique = all_techniques.get(mitre_id)
        if not technique:
            mappings_skipped += 1
            continue

        key = (str(control.id), str(technique.id))
        if key in existing_mappings:
            mappings_skipped += 1
            continue

        mapping = ComplianceControlMapping(
            compliance_control_id=control.id,
            technique_id=technique.id,
        )
        db.add(mapping)
        existing_mappings.add(key)
        mappings_created += 1

    db.commit()

    summary = {
        "framework": framework.name,
        "controls_created": controls_created,
        "controls_existing": controls_existing,
        "mappings_created": mappings_created,
        "mappings_skipped": mappings_skipped,
        "total_controls": controls_created + controls_existing,
        "total_relationships_found": len(relationships),
    }
    logger.info(f"NIST 800-53 import complete: {summary}")
    return summary


def _import_sample_nist_mappings(db: Session, framework: ComplianceFramework) -> dict:
    """Import a curated sample of NIST 800-53 controls when the download fails.

    This ensures the feature works even without network access.
    """
    SAMPLE_CONTROLS = [
        {"control_id": "AC-2", "title": "Account Management", "category": "Access Control",
         "techniques": ["T1078", "T1136", "T1098", "T1087", "T1069"]},
        {"control_id": "AC-3", "title": "Access Enforcement", "category": "Access Control",
         "techniques": ["T1078", "T1548", "T1134"]},
        {"control_id": "AC-4", "title": "Information Flow Enforcement", "category": "Access Control",
         "techniques": ["T1048", "T1041", "T1572"]},
        {"control_id": "AC-6", "title": "Least Privilege", "category": "Access Control",
         "techniques": ["T1078", "T1548", "T1134"]},
        {"control_id": "AU-2", "title": "Event Logging", "category": "Audit and Accountability",
         "techniques": ["T1562", "T1070"]},
        {"control_id": "AU-6", "title": "Audit Record Review", "category": "Audit and Accountability",
         "techniques": ["T1562", "T1070", "T1027"]},
        {"control_id": "CA-7", "title": "Continuous Monitoring", "category": "Assessment, Authorization, and Monitoring",
         "techniques": ["T1059", "T1053"]},
        {"control_id": "CM-2", "title": "Baseline Configuration", "category": "Configuration Management",
         "techniques": ["T1574", "T1546"]},
        {"control_id": "CM-6", "title": "Configuration Settings", "category": "Configuration Management",
         "techniques": ["T1574", "T1546", "T1112"]},
        {"control_id": "CM-7", "title": "Least Functionality", "category": "Configuration Management",
         "techniques": ["T1059", "T1218"]},
        {"control_id": "IA-2", "title": "Identification and Authentication", "category": "Identification and Authentication",
         "techniques": ["T1078", "T1110"]},
        {"control_id": "IA-5", "title": "Authenticator Management", "category": "Identification and Authentication",
         "techniques": ["T1078", "T1110", "T1003"]},
        {"control_id": "IR-4", "title": "Incident Handling", "category": "Incident Response",
         "techniques": ["T1059", "T1547"]},
        {"control_id": "RA-5", "title": "Vulnerability Monitoring and Scanning", "category": "Risk Assessment",
         "techniques": ["T1190", "T1203"]},
        {"control_id": "SC-7", "title": "Boundary Protection", "category": "System and Communications Protection",
         "techniques": ["T1048", "T1041", "T1071"]},
        {"control_id": "SC-28", "title": "Protection of Information at Rest", "category": "System and Communications Protection",
         "techniques": ["T1005", "T1114"]},
        {"control_id": "SI-3", "title": "Malicious Code Protection", "category": "System and Information Integrity",
         "techniques": ["T1059", "T1204", "T1566"]},
        {"control_id": "SI-4", "title": "System Monitoring", "category": "System and Information Integrity",
         "techniques": ["T1059", "T1053", "T1547"]},
        {"control_id": "SI-7", "title": "Software, Firmware, and Information Integrity", "category": "System and Information Integrity",
         "techniques": ["T1195", "T1553"]},
        {"control_id": "PM-16", "title": "Threat Awareness Program", "category": "Program Management",
         "techniques": ["T1566", "T1204"]},
    ]

    # Build technique lookup
    all_techniques = {t.mitre_id: t for t in db.query(Technique).all()}

    existing_controls = {
        c.control_id: c
        for c in db.query(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    }

    existing_mappings = set()
    for m in db.query(ComplianceControlMapping).all():
        existing_mappings.add((str(m.compliance_control_id), str(m.technique_id)))

    controls_created = 0
    mappings_created = 0

    for sample in SAMPLE_CONTROLS:
        # Create or get control
        if sample["control_id"] in existing_controls:
            control = existing_controls[sample["control_id"]]
        else:
            control = ComplianceControl(
                framework_id=framework.id,
                control_id=sample["control_id"],
                title=sample["title"],
                category=sample["category"],
            )
            db.add(control)
            db.flush()
            existing_controls[sample["control_id"]] = control
            controls_created += 1

        # Create mappings
        for mitre_id in sample["techniques"]:
            technique = all_techniques.get(mitre_id)
            if not technique:
                # Try with subtechnique prefix
                for key, tech in all_techniques.items():
                    if key.startswith(mitre_id):
                        technique = tech
                        break
            if not technique:
                continue

            key = (str(control.id), str(technique.id))
            if key in existing_mappings:
                continue

            mapping = ComplianceControlMapping(
                compliance_control_id=control.id,
                technique_id=technique.id,
            )
            db.add(mapping)
            existing_mappings.add(key)
            mappings_created += 1

    db.commit()

    return {
        "framework": framework.name,
        "controls_created": controls_created,
        "controls_existing": len(existing_controls) - controls_created,
        "mappings_created": mappings_created,
        "mappings_skipped": 0,
        "total_controls": len(existing_controls),
        "source": "sample_data",
    }


def import_cis_controls_v8_mappings(db: Session) -> dict:
    """Import CIS Controls v8 with ATT&CK technique mappings.

    Uses a curated set of CIS Controls mapped to MITRE ATT&CK techniques
    based on the CIS Controls Navigator and official documentation.

    Returns a summary dict with counts.
    """
    # ── 1. Create or get framework ────────────────────────────────
    framework = (
        db.query(ComplianceFramework)
        .filter(ComplianceFramework.name == "CIS Controls v8")
        .first()
    )

    if not framework:
        framework = ComplianceFramework(
            name="CIS Controls v8",
            version="8",
            description="Center for Internet Security Critical Security Controls Version 8 — "
                        "a prioritized set of 18 security safeguards organized by Implementation Groups (IG1, IG2, IG3).",
            url="https://www.cisecurity.org/controls/v8",
            is_active=True,
        )
        db.add(framework)
        db.flush()
        logger.info("Created CIS Controls v8 framework")
    else:
        logger.info("CIS Controls v8 framework already exists")

    # ── 2. Control definitions with ATT&CK mappings ───────────────
    CIS_CONTROLS = [
        {
            "control_id": "CIS-1",
            "title": "Inventory and Control of Enterprise Assets",
            "category": "IG1 — Basic",
            "description": (
                "Actively manage all enterprise hardware assets — servers, workstations, mobile "
                "devices, and network equipment — so that only authorised devices are given access. "
                "Attackers routinely perform active scanning (T1595), gather network information "
                "(T1590), and enumerate live hosts (T1018) and system details (T1082) to find "
                "unmanaged or forgotten devices that can serve as entry points. You cannot protect "
                "what you cannot see."
            ),
            "techniques": ["T1595", "T1590", "T1018", "T1082"],
        },
        {
            "control_id": "CIS-2",
            "title": "Inventory and Control of Software Assets",
            "category": "IG1 — Basic",
            "description": (
                "Actively manage all software installed on enterprise assets — only authorised "
                "software should be installed and executed. Unknown or unauthorised software is a "
                "primary indicator of compromise: attackers enumerate installed applications "
                "(T1518), abuse software deployment tools (T1072), and introduce malicious code "
                "via compromised software supply chains (T1195). An allowlist of approved software "
                "makes unauthorised installations immediately detectable."
            ),
            "techniques": ["T1518", "T1072", "T1195"],
        },
        {
            "control_id": "CIS-3",
            "title": "Data Protection",
            "category": "IG1 — Basic",
            "description": (
                "Develop processes to identify, classify, and protect sensitive data throughout "
                "its lifecycle. Data exfiltration is the primary objective of most targeted "
                "attacks: attackers collect local files (T1005), harvest emails (T1114), stage "
                "and compress data for extraction (T1560), and exfiltrate via alternative protocols "
                "(T1048) or C2 channels (T1041). Understanding where sensitive data lives is the "
                "prerequisite for preventing it from leaving."
            ),
            "techniques": ["T1005", "T1114", "T1560", "T1048", "T1041"],
        },
        {
            "control_id": "CIS-4",
            "title": "Secure Configuration of Enterprise Assets and Software",
            "category": "IG1 — Basic",
            "description": (
                "Establish and maintain secure configurations for all enterprise assets and software. "
                "Default or insecure configurations are exploited by attackers to establish "
                "persistence: DLL hijacking and path manipulation (T1574), event-triggered execution "
                "hooks (T1546), registry modifications (T1112), and malicious service installation "
                "(T1543). CIS Benchmarks provide vendor-specific hardening guidance that significantly "
                "raises the bar for attackers."
            ),
            "techniques": ["T1574", "T1546", "T1112", "T1543"],
        },
        {
            "control_id": "CIS-5",
            "title": "Account Management",
            "category": "IG1 — Basic",
            "description": (
                "Use processes and tools to assign and manage authorisation for all accounts — "
                "including credentials, permissions, and lifecycle management. Poorly managed "
                "accounts are the single most exploited attack vector: valid stolen credentials "
                "(T1078), creation of backdoor accounts (T1136), modification of existing account "
                "privileges (T1098), and enumeration of all accounts to identify high-value targets "
                "(T1087). A mature account management programme prevents orphaned, over-privileged, "
                "and shared accounts."
            ),
            "techniques": ["T1078", "T1136", "T1098", "T1087"],
        },
        {
            "control_id": "CIS-6",
            "title": "Access Control Management",
            "category": "IG1 — Basic",
            "description": (
                "Use processes and tools to create, assign, manage, and revoke access credentials "
                "and privileges based on least privilege. After gaining initial access, attackers "
                "need to escalate privileges and move laterally — they abuse valid high-privilege "
                "accounts (T1078), exploit privilege escalation vulnerabilities (T1548), manipulate "
                "access tokens (T1134), and use remote services to reach additional systems (T1021). "
                "Least-privilege access control directly constrains all of these techniques."
            ),
            "techniques": ["T1078", "T1548", "T1134", "T1021"],
        },
        {
            "control_id": "CIS-7",
            "title": "Continuous Vulnerability Management",
            "category": "IG2 — Foundational",
            "description": (
                "Continuously acquire, assess, and take action on new information about "
                "vulnerabilities to remediate and minimise the window of opportunity for attackers. "
                "Unpatched vulnerabilities are a primary attack vector for initial access and "
                "privilege escalation: exploitation of internet-facing applications (T1190), "
                "client-side vulnerabilities (T1203), local privilege escalation flaws (T1068), "
                "and network service vulnerabilities (T1210). The average time between vulnerability "
                "disclosure and exploitation is now less than 15 days."
            ),
            "techniques": ["T1190", "T1203", "T1068", "T1210"],
        },
        {
            "control_id": "CIS-8",
            "title": "Audit Log Management",
            "category": "IG2 — Foundational",
            "description": (
                "Collect, alert, review, and retain audit logs to detect attacks and enable "
                "investigations. Audit logs are the primary resource for incident response — "
                "and therefore the primary target for attacker cleanup: disabling security tools "
                "and logging (T1562), clearing Windows Event Logs, bash history, and syslog "
                "entries (T1070), and using command-line tools to execute without leaving "
                "traces (T1059). A centralised, write-protected log store is essential."
            ),
            "techniques": ["T1562", "T1070", "T1059"],
        },
        {
            "control_id": "CIS-9",
            "title": "Email and Web Browser Protections",
            "category": "IG2 — Foundational",
            "description": (
                "Improve protections and detections of threats from email and web vectors — the "
                "primary delivery mechanisms for malware and social engineering. The majority of "
                "successful breaches begin with a phishing email (T1566), a user clicking a "
                "malicious link or attachment (T1204), a drive-by download from a compromised "
                "site (T1189), or spear-phishing for credentials (T1598). Email security, web "
                "filtering, and user training form the essential first line of defence."
            ),
            "techniques": ["T1566", "T1204", "T1189", "T1598"],
        },
        {
            "control_id": "CIS-10",
            "title": "Malware Defenses",
            "category": "IG2 — Foundational",
            "description": (
                "Prevent or control the installation, spread, and execution of malicious applications, "
                "code, or scripts. Malware executes via scripting engines (T1059), user-initiated "
                "actions (T1204), and uses obfuscation (T1027) and decoding techniques (T1140) to "
                "evade detection tools. Some malware also checks for sandbox environments (T1497) "
                "before activating. Effective malware defence requires layered controls — endpoint "
                "detection, application control, and behaviour-based analysis."
            ),
            "techniques": ["T1059", "T1204", "T1027", "T1140", "T1497"],
        },
        {
            "control_id": "CIS-11",
            "title": "Data Recovery",
            "category": "IG1 — Basic",
            "description": (
                "Establish and maintain data recovery practices sufficient to restore in-scope "
                "enterprise assets to a pre-incident state. Modern ransomware specifically targets "
                "backup infrastructure to maximise extortion leverage: encrypting all accessible "
                "data (T1486), deleting or inhibiting backup and recovery tools (T1490), and "
                "wiping disks entirely (T1561). The 3-2-1 backup rule — three copies, two different "
                "media, one offsite — with immutable storage and regular recovery tests is the "
                "only reliable defence."
            ),
            "techniques": ["T1486", "T1490", "T1561"],
        },
        {
            "control_id": "CIS-12",
            "title": "Network Infrastructure Management",
            "category": "IG2 — Foundational",
            "description": (
                "Establish, implement, and actively manage network infrastructure using a "
                "comprehensive security process. Attackers abuse weak network infrastructure "
                "for man-in-the-middle attacks (T1557), use standard application protocols to "
                "blend command-and-control traffic with normal traffic (T1071), tunnel malicious "
                "traffic through legitimate protocols (T1572), and use non-standard ports to evade "
                "filtering (T1571). Network hardening, firmware management, and network monitoring "
                "are the primary safeguards."
            ),
            "techniques": ["T1557", "T1071", "T1572", "T1571"],
        },
        {
            "control_id": "CIS-13",
            "title": "Network Monitoring and Defense",
            "category": "IG2 — Foundational",
            "description": (
                "Operate processes and tooling to establish and maintain comprehensive network "
                "monitoring and defence against security threats. Without network monitoring, "
                "exfiltration goes undetected: data exfiltrated via web protocols (T1071) or "
                "alternative channels (T1048, T1041), tool transfer to and from attacker "
                "infrastructure (T1105), and protocol tunnelling to bypass controls (T1572). "
                "Network detection and response (NDR) tools, combined with IDS signatures and "
                "anomaly detection, form the core technical controls."
            ),
            "techniques": ["T1071", "T1048", "T1041", "T1105", "T1572"],
        },
        {
            "control_id": "CIS-14",
            "title": "Security Awareness and Skills Training",
            "category": "IG1 — Basic",
            "description": (
                "Establish and maintain a security awareness programme that addresses the full range "
                "of threats facing the organisation. Social engineering remains the most effective "
                "attack vector because it bypasses technical controls: phishing emails (T1566), "
                "malicious attachments (T1204), and credential harvesting via fake login pages "
                "(T1598) succeed because users lack the training to recognise them. Regular, "
                "scenario-based training with simulated phishing campaigns provides measurable "
                "improvement."
            ),
            "techniques": ["T1566", "T1204", "T1598"],
        },
        {
            "control_id": "CIS-15",
            "title": "Service Provider Management",
            "category": "IG2 — Foundational",
            "description": (
                "Develop a process to evaluate service providers who hold sensitive data or are "
                "responsible for critical IT platforms. Supply chain and third-party attacks have "
                "become one of the most impactful threat vectors: adversaries exploit trusted "
                "relationships with managed service providers (T1199) and compromise software "
                "supply chains to reach downstream targets (T1195). Vendor risk assessments, "
                "contractual security requirements, and continuous monitoring are essential."
            ),
            "techniques": ["T1199", "T1195"],
        },
        {
            "control_id": "CIS-16",
            "title": "Application Software Security",
            "category": "IG2 — Foundational",
            "description": (
                "Manage the security lifecycle of in-house developed and acquired software in "
                "order to prevent, detect, and remediate security weaknesses. Application "
                "vulnerabilities are a primary initial access vector: internet-facing application "
                "exploitation (T1190), command execution through application weaknesses (T1059), "
                "and client-side code execution (T1203). A secure software development lifecycle "
                "(SSDLC) with threat modelling, code review, and penetration testing catches "
                "vulnerabilities before they reach production."
            ),
            "techniques": ["T1190", "T1059", "T1203"],
        },
        {
            "control_id": "CIS-17",
            "title": "Incident Response Management",
            "category": "IG2 — Foundational",
            "description": (
                "Establish a programme to develop and maintain an incident response capability — "
                "including a plan, defined roles, training, and exercises. Effective incident "
                "response must counter attacker persistence mechanisms before they re-establish "
                "footholds: scripted commands (T1059), boot or logon persistence (T1547), and "
                "scheduled tasks (T1053) that survive a reboot. A tested incident response plan "
                "reduces average dwell time and limits the damage from any breach."
            ),
            "techniques": ["T1059", "T1547", "T1053"],
        },
        {
            "control_id": "CIS-18",
            "title": "Penetration Testing",
            "category": "IG3 — Organizational",
            "description": (
                "Test the effectiveness of organisational defences (people, processes, technology) "
                "by safely simulating adversary objectives and actions. This is the CIS control "
                "most directly aligned with the Aegis Red Team platform. Penetration tests "
                "simulate reconnaissance (T1595), service discovery (T1046), exploitation of "
                "public-facing applications (T1190), and post-exploitation execution (T1059) to "
                "validate whether defensive controls work in practice. Every test executed in Aegis "
                "directly contributes to evidence for this control."
            ),
            "techniques": ["T1595", "T1046", "T1190", "T1059"],
        },
    ]

    # Build technique lookup
    all_techniques = {t.mitre_id: t for t in db.query(Technique).all()}

    existing_controls = {
        c.control_id: c
        for c in db.query(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    }

    existing_mappings = set()
    for m in (
        db.query(ComplianceControlMapping)
        .join(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    ):
        existing_mappings.add((str(m.compliance_control_id), str(m.technique_id)))

    controls_created = 0
    mappings_created = 0

    for item in CIS_CONTROLS:
        if item["control_id"] in existing_controls:
            control = existing_controls[item["control_id"]]
        else:
            control = ComplianceControl(
                framework_id=framework.id,
                control_id=item["control_id"],
                title=item["title"],
                category=item["category"],
            )
            db.add(control)
            db.flush()
            existing_controls[item["control_id"]] = control
            controls_created += 1

        for mitre_id in item["techniques"]:
            technique = all_techniques.get(mitre_id)
            if not technique:
                continue
            key = (str(control.id), str(technique.id))
            if key in existing_mappings:
                continue
            mapping = ComplianceControlMapping(
                compliance_control_id=control.id,
                technique_id=technique.id,
            )
            db.add(mapping)
            existing_mappings.add(key)
            mappings_created += 1

    db.commit()

    summary = {
        "framework": framework.name,
        "controls_created": controls_created,
        "controls_existing": len(existing_controls) - controls_created,
        "mappings_created": mappings_created,
        "total_controls": len(existing_controls),
    }
    logger.info(f"CIS Controls v8 import complete: {summary}")
    return summary


def import_dora_mappings(db: Session) -> dict:
    """Import DORA (Digital Operational Resilience Act) with ATT&CK technique mappings.

    DORA (EU 2022/2554) applies to financial entities and ICT third-party providers.
    Controls map the key cybersecurity articles (Chapters II–VI) to MITRE ATT&CK
    techniques based on ENISA guidance and TIBER-EU threat-led testing framework.

    Returns a summary dict with counts.
    """
    # ── 1. Create or get framework ────────────────────────────────
    framework = (
        db.query(ComplianceFramework)
        .filter(ComplianceFramework.name == "DORA")
        .first()
    )

    if not framework:
        framework = ComplianceFramework(
            name="DORA",
            version="2022/2554",
            description=(
                "Digital Operational Resilience Act (Regulation EU 2022/2554) — "
                "EU regulation establishing ICT risk management, incident reporting, "
                "digital operational resilience testing, and ICT third-party risk "
                "management requirements for financial entities."
            ),
            url="https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32022R2554",
            is_active=True,
        )
        db.add(framework)
        db.flush()
        logger.info("Created DORA framework")
    else:
        logger.info("DORA framework already exists")

    # ── 2. Control definitions with ATT&CK mappings ───────────────
    # Based on ENISA DORA guidelines and TIBER-EU threat intelligence framework.
    # Each control maps to a DORA article and the ATT&CK techniques it addresses.
    DORA_CONTROLS = [
        # ─── Chapter II — ICT Risk Management ────────────────────────────
        {
            "control_id": "DORA-Art.5",
            "title": "Governance and Organisation",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 5 requires the management body of financial entities to define, "
                "approve, and oversee ICT risk management. This means executive responsibility "
                "for cybersecurity — not just delegation to IT. Governance failures enable "
                "credential-based attacks on unmonitored accounts (T1078), creation of "
                "unauthorised accounts (T1136), privilege escalation (T1098), and unchecked "
                "account enumeration (T1087). Regulators (EBA, ESMA, EIOPA) expect evidence "
                "that the board actively monitors ICT risk indicators."
            ),
            "techniques": ["T1078", "T1136", "T1098", "T1087"],
        },
        {
            "control_id": "DORA-Art.6",
            "title": "ICT Risk Management Framework",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 6 requires a comprehensive, documented ICT risk management framework "
                "covering identification, protection, detection, response, and recovery. The "
                "framework must be tested against realistic threats — active scanning (T1595), "
                "network reconnaissance (T1590), employee intelligence gathering (T1589), port "
                "scanning (T1046), host enumeration (T1018), and system profiling (T1082). "
                "Red Team exercises under DORA's TLPT programme are the primary evidence that "
                "the risk framework functions as intended."
            ),
            "techniques": ["T1595", "T1590", "T1589", "T1046", "T1018", "T1082"],
        },
        {
            "control_id": "DORA-Art.7",
            "title": "ICT Systems, Protocols and Tools",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 7 requires financial entities to keep ICT systems up to date, "
                "securely configured, and with minimised attack surface. Attackers exploit "
                "outdated and misconfigured systems: DLL hijacking (T1574), rogue service "
                "installation (T1543), registry manipulation (T1112), event-triggered hooks "
                "(T1546), compromised software components (T1195), and abuse of legitimate "
                "external remote services (T1133). Hardened, maintained systems are a "
                "fundamental DORA compliance requirement."
            ),
            "techniques": ["T1574", "T1543", "T1112", "T1546", "T1195", "T1133"],
        },
        {
            "control_id": "DORA-Art.8",
            "title": "Identification",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 8 requires financial entities to identify and classify all ICT "
                "assets, data, and third-party dependencies that support critical functions. "
                "Attackers perform the same discovery to understand their target: scanning for "
                "live hosts (T1018), querying system information (T1082), finding sensitive "
                "files (T1083), enumerating accounts (T1087), and mapping network and "
                "infrastructure (T1590, T1592). An entity that knows its own assets better than "
                "an attacker does has a fundamental defensive advantage."
            ),
            "techniques": ["T1018", "T1082", "T1083", "T1087", "T1590", "T1592"],
        },
        {
            "control_id": "DORA-Art.9",
            "title": "Protection and Prevention",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 9 requires financial entities to implement continuous protection "
                "measures — access controls, network segmentation, patch management, and change "
                "management — to prevent ICT incidents. Protection must address the full kill "
                "chain: credential abuse (T1078), privilege escalation (T1548, T1134), "
                "application exploitation (T1190), persistence via system modifications (T1574, "
                "T1543), and lateral movement (T1021). DORA supervisors assess protection "
                "effectiveness through scenario-based testing."
            ),
            "techniques": ["T1078", "T1548", "T1134", "T1190", "T1574", "T1543", "T1021"],
        },
        {
            "control_id": "DORA-Art.10",
            "title": "Detection",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 10 requires financial entities to implement mechanisms to promptly "
                "detect anomalous activities. The detection capability must specifically identify "
                "attacker attempts to: disable security tooling (T1562), clear log evidence "
                "(T1070), execute malicious commands (T1059), use scheduled tasks for persistence "
                "(T1053), establish boot-time persistence (T1547), and abuse logon scripts "
                "(T1037). DORA expects mean time to detect (MTTD) to be measured and improved "
                "continuously."
            ),
            "techniques": ["T1562", "T1070", "T1059", "T1053", "T1547", "T1037"],
        },
        {
            "control_id": "DORA-Art.11",
            "title": "Response and Recovery",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 11 requires documented, tested response and recovery plans for ICT "
                "incidents, including defined recovery time objectives (RTO) and recovery point "
                "objectives (RPO) for critical functions. Financial entities must be able to "
                "recover from the most damaging attacks: ransomware (T1486), backup deletion "
                "(T1490), disk wiping (T1561), data destruction (T1485), and exfiltration "
                "(T1048, T1041). DORA supervisors have the power to require live resilience tests."
            ),
            "techniques": ["T1486", "T1490", "T1561", "T1485", "T1048", "T1041"],
        },
        {
            "control_id": "DORA-Art.12",
            "title": "Backup Policies and Recovery Methods",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 12 specifically mandates backup policies for all data, software, "
                "and systems supporting critical functions, with tested recovery procedures. "
                "Ransomware groups are acutely aware of this requirement and deliberately target "
                "backups: encrypting all accessible data (T1486), deleting shadow copies and "
                "inhibiting recovery tools (T1490), wiping disks (T1561), and destroying data "
                "entirely (T1485). DORA requires backups to be isolated from the production "
                "network and tested at least annually."
            ),
            "techniques": ["T1486", "T1490", "T1561", "T1485"],
        },
        {
            "control_id": "DORA-Art.13",
            "title": "Learning and Evolving",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 13 requires financial entities to learn from ICT incidents — both "
                "their own and sector-wide — and continuously improve their ICT risk framework. "
                "This includes threat intelligence consumption: understanding current phishing "
                "campaigns (T1566), employee data harvesting (T1589), infrastructure reconnaissance "
                "(T1590), active scanning of financial sector targets (T1595), and credential "
                "phishing (T1598). Threat intelligence feeds directly inform which Red Team "
                "scenarios are most relevant."
            ),
            "techniques": ["T1566", "T1589", "T1590", "T1595", "T1598"],
        },
        {
            "control_id": "DORA-Art.14",
            "title": "Communication",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 14 requires crisis communication plans for ICT incidents — covering "
                "internal communication, staff notification, and external communication to clients, "
                "counterparties, and regulators. Communication channels are themselves an attack "
                "vector: attackers harvest email content (T1114), use phishing to impersonate "
                "communications (T1566), abuse web services as covert channels (T1102), and use "
                "standard web protocols to blend C2 traffic (T1071). Secure, authenticated "
                "communication channels are therefore also an ICT risk requirement."
            ),
            "techniques": ["T1114", "T1566", "T1102", "T1071"],
        },
        {
            "control_id": "DORA-Art.15",
            "title": "Further Harmonisation of ICT Risk Management Tools",
            "category": "Chapter II — ICT Risk Management",
            "description": (
                "DORA Article 15 mandates that simplified ICT risk management requirements for "
                "smaller financial entities still cover the essential controls. Even simplified "
                "frameworks must address: credential compromise (T1078), exploitation of "
                "internet-facing systems (T1190), remote access abuse (T1133, T1021), and third-party "
                "risk (T1199). The proportionality principle does not reduce the required security "
                "outcomes — only the implementation complexity."
            ),
            "techniques": ["T1078", "T1190", "T1133", "T1021", "T1199"],
        },
        # ─── Chapter III — ICT-related Incident Management ────────────────
        {
            "control_id": "DORA-Art.17",
            "title": "ICT-related Incidents Classification",
            "category": "Chapter III — Incident Management",
            "description": (
                "DORA Article 17 mandates a classification process for ICT incidents based on "
                "criteria including impact on services, clients, and financial stability. "
                "Classification must correctly identify high-severity incidents like: DDoS attacks "
                "impacting service availability (T1499, T1498), ransomware causing business "
                "disruption (T1486), data exfiltration affecting client data (T1041, T1048), and "
                "data manipulation affecting transaction integrity (T1565). Misclassification "
                "leads to delayed regulatory reporting and supervisory sanctions."
            ),
            "techniques": ["T1499", "T1498", "T1486", "T1041", "T1048", "T1565"],
        },
        {
            "control_id": "DORA-Art.18",
            "title": "Major ICT-Related Incidents Reporting",
            "category": "Chapter III — Incident Management",
            "description": (
                "DORA Article 18 requires mandatory reporting of major ICT incidents to competent "
                "authorities (ECB, national regulators) within strict timeframes — initial "
                "notification within 4 hours, intermediate report within 72 hours, final report "
                "within 1 month. Qualifying incidents include ransomware (T1486), significant "
                "data exfiltration (T1041, T1048), and availability-impacting attacks (T1499, "
                "T1498). Non-compliance with reporting timelines carries significant supervisory "
                "and reputational risk."
            ),
            "techniques": ["T1486", "T1041", "T1048", "T1499", "T1498"],
        },
        {
            "control_id": "DORA-Art.19",
            "title": "Harmonisation of Reporting Content and Formats",
            "category": "Chapter III — Incident Management",
            "description": (
                "DORA Article 19 requires standardised incident report formats as specified by "
                "the Joint Committee of ESAs. Reports must contain technical details about the "
                "attack: initial access via phishing (T1566), application vulnerabilities (T1190), "
                "execution mechanisms (T1203, T1059), and the full timeline. This requires that "
                "the organisation maintains high-fidelity detection and logging capabilities to "
                "populate the mandatory report fields accurately."
            ),
            "techniques": ["T1566", "T1190", "T1203", "T1059"],
        },
        # ─── Chapter IV — Digital Operational Resilience Testing ──────────
        {
            "control_id": "DORA-Art.24",
            "title": "General Digital Operational Resilience Testing",
            "category": "Chapter IV — Resilience Testing",
            "description": (
                "DORA Article 24 requires ALL financial entities to conduct a comprehensive "
                "digital operational resilience testing programme annually — covering vulnerability "
                "assessments, network security testing, and scenario-based tests. The testing "
                "programme must validate defences against realistic attacks including command "
                "execution (T1059), application exploitation (T1190), service discovery (T1046), "
                "reconnaissance (T1595), and credential abuse (T1078). Aegis directly supports "
                "this requirement by providing evidence of test coverage and outcomes."
            ),
            "techniques": ["T1059", "T1190", "T1046", "T1595", "T1078"],
        },
        {
            "control_id": "DORA-Art.25",
            "title": "Testing of ICT Tools and Systems",
            "category": "Chapter IV — Resilience Testing",
            "description": (
                "DORA Article 25 requires testing of ICT systems and tools that support critical "
                "and important functions — including penetration testing of production or "
                "representative environments. Testing scope must cover exploitation of systems "
                "(T1059, T1190, T1046, T1595), credential attacks (T1078), privilege escalation "
                "(T1068), and network service exploitation (T1210). Test results must be shared "
                "with competent authorities on request, making a structured testing platform "
                "with evidence retention (like Aegis) a compliance necessity."
            ),
            "techniques": ["T1059", "T1190", "T1046", "T1595", "T1078", "T1068", "T1210"],
        },
        {
            "control_id": "DORA-Art.26",
            "title": "Advanced Testing — Threat-Led Penetration Testing (TLPT)",
            "category": "Chapter IV — Resilience Testing",
            "description": (
                "DORA Article 26 mandates Threat-Led Penetration Testing (TLPT) — equivalent to "
                "TIBER-EU — for significant financial entities every 3 years. TLPT is a full "
                "Red Team simulation based on real threat intelligence, testing the organisation's "
                "ability to detect and respond to a realistic advanced adversary. Techniques "
                "covered include: phishing (T1566), user execution (T1204), process injection "
                "(T1055), command execution (T1059), lateral movement (T1021), credential abuse "
                "(T1078), application exploitation (T1190), service scanning (T1046), privilege "
                "escalation (T1548, T1134), and payload obfuscation (T1027). DORA TLPT is the "
                "most rigorous regulatory cyber resilience test in the financial sector."
            ),
            "techniques": [
                "T1566", "T1204", "T1055", "T1059", "T1021", "T1078",
                "T1190", "T1046", "T1548", "T1134", "T1027",
            ],
        },
        {
            "control_id": "DORA-Art.27",
            "title": "Requirements for Testers Carrying Out TLPT",
            "category": "Chapter IV — Resilience Testing",
            "description": (
                "DORA Article 27 sets requirements for the Red Team providers conducting TLPT — "
                "they must be independent, technically qualified, and approved by competent "
                "authorities. The testing scope must include realistic reconnaissance (T1595), "
                "service enumeration (T1046), exploitation (T1190), post-exploitation execution "
                "(T1059), and abuse of valid credentials (T1078). TLPT providers must follow "
                "TIBER-EU methodology, using actual threat intelligence specific to the target "
                "entity's threat landscape."
            ),
            "techniques": ["T1595", "T1046", "T1190", "T1059", "T1078"],
        },
        # ─── Chapter V — ICT Third-Party Risk Management ──────────────────
        {
            "control_id": "DORA-Art.28",
            "title": "General Principles of ICT Third-Party Risk Management",
            "category": "Chapter V — Third-Party Risk",
            "description": (
                "DORA Article 28 requires a comprehensive ICT third-party risk management "
                "strategy, including registers of all third-party providers and pre-contractual "
                "due diligence. Financial entities have been repeatedly compromised through their "
                "supply chains: trusted relationship exploitation (T1199), software supply chain "
                "compromise (T1195), credential theft for third-party systems (T1078), and abuse "
                "of legitimate remote access tools (T1133). DORA supervisors can examine "
                "third-party risk registers during inspections."
            ),
            "techniques": ["T1199", "T1195", "T1078", "T1133"],
        },
        {
            "control_id": "DORA-Art.30",
            "title": "Key Contractual Provisions for ICT Services",
            "category": "Chapter V — Third-Party Risk",
            "description": (
                "DORA Article 30 specifies mandatory contract clauses for ICT third-party service "
                "agreements — including security requirements, audit rights, incident notification, "
                "and exit strategies. Without contractual security obligations, third parties "
                "become the weakest link: exploitation of trusted relationships (T1199), "
                "compromised software delivered by the vendor (T1195), and credential sharing "
                "that allows lateral movement (T1078). DORA requires financial entities to "
                "actively enforce these clauses, not merely include them."
            ),
            "techniques": ["T1199", "T1195", "T1078"],
        },
        {
            "control_id": "DORA-Art.42",
            "title": "Oversight of Critical ICT Third-Party Providers",
            "category": "Chapter V — Third-Party Risk",
            "description": (
                "DORA Article 42 establishes a new EU-level oversight framework for ICT providers "
                "designated as 'critical' — cloud providers, data centres, and core software "
                "vendors serving multiple financial entities simultaneously. A compromise of a "
                "critical ICT provider would represent systemic risk to financial stability: "
                "exploiting trusted relationships (T1199, T1195), leveraging legitimate remote "
                "access (T1133), abusing shared credentials (T1078), and exploiting the provider's "
                "own internet-facing infrastructure (T1190). DORA Lead Overseers can demand "
                "remediation actions from critical providers directly."
            ),
            "techniques": ["T1199", "T1195", "T1133", "T1078", "T1190"],
        },
        # ─── Chapter VI — Information Sharing ────────────────────────────
        {
            "control_id": "DORA-Art.45",
            "title": "Arrangements for Information Sharing on Cyber Threats",
            "category": "Chapter VI — Information Sharing",
            "description": (
                "DORA Article 45 encourages financial entities to participate in cyber threat "
                "information sharing arrangements — sharing indicators of compromise, attack "
                "patterns, and tactical intelligence with peers and regulators. Shared intelligence "
                "enables the sector to collectively defend against sector-specific threats: "
                "phishing campaigns targeting financial firms (T1566), data harvesting on "
                "employees (T1589), infrastructure reconnaissance of banking networks (T1590), "
                "active scanning of financial sector assets (T1595), and spear-phishing for "
                "credentials (T1598). Participation in CERT-level sharing programmes satisfies "
                "this requirement."
            ),
            "techniques": ["T1566", "T1589", "T1590", "T1595", "T1598"],
        },
    ]

    # Build technique lookup
    all_techniques = {t.mitre_id: t for t in db.query(Technique).all()}

    existing_controls = {
        c.control_id: c
        for c in db.query(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    }

    existing_mappings = set()
    for m in (
        db.query(ComplianceControlMapping)
        .join(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    ):
        existing_mappings.add((str(m.compliance_control_id), str(m.technique_id)))

    controls_created = 0
    mappings_created = 0

    for item in DORA_CONTROLS:
        if item["control_id"] in existing_controls:
            control = existing_controls[item["control_id"]]
        else:
            control = ComplianceControl(
                framework_id=framework.id,
                control_id=item["control_id"],
                title=item["title"],
                category=item["category"],
            )
            db.add(control)
            db.flush()
            existing_controls[item["control_id"]] = control
            controls_created += 1

        for mitre_id in item["techniques"]:
            technique = all_techniques.get(mitre_id)
            if not technique:
                continue
            key = (str(control.id), str(technique.id))
            if key in existing_mappings:
                continue
            mapping = ComplianceControlMapping(
                compliance_control_id=control.id,
                technique_id=technique.id,
            )
            db.add(mapping)
            existing_mappings.add(key)
            mappings_created += 1

    db.commit()

    summary = {
        "framework": framework.name,
        "controls_created": controls_created,
        "controls_existing": len(existing_controls) - controls_created,
        "mappings_created": mappings_created,
        "total_controls": len(existing_controls),
    }
    logger.info(f"DORA import complete: {summary}")
    return summary


def import_iso_27001_mappings(db: Session) -> dict:
    """Import ISO/IEC 27001:2022 Annex A controls with ATT&CK technique mappings.

    ISO/IEC 27001:2022 has 93 controls in Annex A organised into 4 themes:
    - 5. Organizational controls (37)
    - 6. People controls (8)
    - 7. Physical controls (14)
    - 8. Technological controls (34)

    Mappings follow MITRE ATT&CK Enterprise v14 and published ISO/IEC 27002:2022
    guidance on threat mitigations.

    Returns a summary dict with counts.
    """
    framework = (
        db.query(ComplianceFramework)
        .filter(ComplianceFramework.name == "ISO/IEC 27001:2022")
        .first()
    )

    if not framework:
        framework = ComplianceFramework(
            name="ISO/IEC 27001:2022",
            version="2022",
            description=(
                "ISO/IEC 27001:2022 — International standard for Information Security "
                "Management Systems (ISMS). Annex A contains 93 controls across 4 themes: "
                "Organizational, People, Physical, and Technological."
            ),
            url="https://www.iso.org/standard/27001",
            is_active=True,
        )
        db.add(framework)
        db.flush()
        logger.info("Created ISO/IEC 27001:2022 framework")
    else:
        logger.info("ISO/IEC 27001:2022 framework already exists")

    ISO_27001_CONTROLS = [
        # ── 5. Organizational Controls ──────────────────────────────────────
        {
            "control_id": "5.2",
            "title": "Information Security Roles and Responsibilities",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires that information security responsibilities are clearly defined, allocated, "
                "and communicated. Without clear ownership, attackers exploit gaps between teams — "
                "for example, using valid accounts (T1078) whose owners are unknown, enumerating "
                "accounts (T1087) that no one monitors, or abusing group memberships (T1069) never "
                "reviewed after personnel changes."
            ),
            "techniques": ["T1078", "T1087", "T1069"],
        },
        {
            "control_id": "5.7",
            "title": "Threat Intelligence",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires the organisation to collect, analyse, and act on information about threats "
                "relevant to its assets. This directly counters attackers' reconnaissance activities: "
                "phishing campaigns (T1566), harvesting employee data (T1589), mapping the network "
                "perimeter (T1590), performing active scanning (T1595), and spear-phishing for "
                "credentials (T1598). Effective threat intelligence allows defenders to anticipate "
                "and disrupt these activities before they succeed."
            ),
            "techniques": ["T1566", "T1589", "T1590", "T1595", "T1598"],
        },
        {
            "control_id": "5.9",
            "title": "Inventory of Information and Other Assets",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires maintaining an accurate, up-to-date inventory of all information assets "
                "and their owners. Attackers routinely discover systems that the organisation itself "
                "has forgotten — querying system information (T1082), finding forgotten files (T1083), "
                "scanning for live hosts (T1018), or gathering infrastructure details (T1592). "
                "An asset inventory is the prerequisite for almost every other security control."
            ),
            "techniques": ["T1082", "T1083", "T1018", "T1592"],
        },
        {
            "control_id": "5.14",
            "title": "Information Transfer",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires rules and controls for transferring information — whether via email, "
                "removable media, cloud sharing, or messaging. It directly addresses data exfiltration "
                "paths: non-standard ports (T1048), command-and-control channels (T1041), cloud "
                "storage services (T1567), and standard web protocols used to blend with normal "
                "traffic (T1071). Without these controls, sensitive data can leave the organisation "
                "undetected."
            ),
            "techniques": ["T1048", "T1041", "T1567", "T1071"],
        },
        {
            "control_id": "5.16",
            "title": "Identity Management",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires a full lifecycle process for managing digital identities — from creation "
                "to deletion. Attackers who compromise an identity gain persistent access; they use "
                "valid stolen credentials (T1078), create new accounts (T1136), modify existing "
                "account permissions (T1098), and enumerate accounts to find privileged targets "
                "(T1087). Strong identity management directly reduces the blast radius of any "
                "credential compromise."
            ),
            "techniques": ["T1078", "T1136", "T1098", "T1087"],
        },
        {
            "control_id": "5.17",
            "title": "Authentication Information",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires secure management of all authentication secrets — passwords, tokens, "
                "certificates, and API keys. Weak authentication is the most common initial access "
                "vector: brute force and password spraying (T1110), credential dumping from memory "
                "or disk (T1003), use of stolen credentials (T1078), and harvesting secrets from "
                "configuration files (T1552). Enforcing strong, unique credentials with MFA "
                "neutralises the majority of these attacks."
            ),
            "techniques": ["T1110", "T1003", "T1078", "T1552"],
        },
        {
            "control_id": "5.20",
            "title": "Addressing Information Security in Supplier Agreements",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires that security obligations are contractually embedded in all supplier "
                "relationships. Supply chain attacks — where adversaries compromise a trusted "
                "third-party to reach the target (T1199, T1195) — have caused some of the largest "
                "breaches in recent years (e.g., SolarWinds, 3CX). Security clauses in contracts, "
                "combined with regular supplier audits, create accountability and reduce this risk."
            ),
            "techniques": ["T1199", "T1195"],
        },
        {
            "control_id": "5.23",
            "title": "Information Security for Use of Cloud Services",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires security policies and controls specifically tailored for cloud services, "
                "including roles, data classification, and monitoring. Cloud misconfiguration is "
                "now the leading cause of data breaches — attackers access data from cloud storage "
                "(T1530), exfiltrate to adversary-owned cloud accounts (T1537), abuse cloud "
                "credentials (T1078), and exploit internet-facing cloud APIs (T1190). This control "
                "establishes the governance layer that prevents these exposures."
            ),
            "techniques": ["T1530", "T1537", "T1078", "T1190"],
        },
        {
            "control_id": "5.24",
            "title": "Information Security Incident Management Planning",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires documented, tested incident management procedures so the organisation can "
                "respond effectively when an attack occurs. Without a plan, response is slow and "
                "inconsistent — attackers establish persistent footholds via scheduled tasks or "
                "startup entries (T1059, T1547) or deploy ransomware (T1486) while defenders are "
                "still trying to understand what is happening. A tested plan reduces dwell time "
                "from months to hours."
            ),
            "techniques": ["T1059", "T1547", "T1486"],
        },
        {
            "control_id": "5.26",
            "title": "Response to Information Security Incidents",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires a structured response process — containment, eradication, and recovery — "
                "when incidents are confirmed. Effective response must counter attacker persistence "
                "mechanisms (T1059, T1547), recover evidence before it is wiped (T1070), and "
                "restore disabled monitoring tools (T1562). The Red Team exercises mapped here "
                "validate whether detection and response capabilities actually work under realistic "
                "attack conditions."
            ),
            "techniques": ["T1059", "T1547", "T1070", "T1562"],
        },
        {
            "control_id": "5.28",
            "title": "Collection of Evidence",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires that digital evidence is collected, preserved, and handled in a way that "
                "maintains its integrity for potential legal proceedings or regulatory investigations. "
                "Attackers deliberately destroy logs (T1070) and tamper with or disable security "
                "tools (T1562) to prevent forensic analysis. This control ensures the organisation "
                "can prove what happened and who was responsible."
            ),
            "techniques": ["T1562", "T1070"],
        },
        {
            "control_id": "5.29",
            "title": "Information Security During Disruption",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires maintaining an acceptable security level even during major disruptions "
                "such as disasters, outages, or crises. Ransomware (T1486), disk wipers (T1561), "
                "and backup deletion (T1490) are specifically designed to make recovery impossible "
                "and extort organisations. Controls must ensure backups are immutable and recovery "
                "procedures are tested regularly."
            ),
            "techniques": ["T1486", "T1490", "T1561"],
        },
        {
            "control_id": "5.30",
            "title": "ICT Readiness for Business Continuity",
            "category": "5 — Organizational Controls",
            "description": (
                "Requires ICT infrastructure to be resilient enough to continue critical operations "
                "after a disruptive event. This addresses destructive attacks — ransomware (T1486), "
                "backup inhibition (T1490), and denial-of-service (T1499, T1498) — that aim to "
                "make the organisation unable to operate. Business continuity tests that include "
                "cyber scenarios are the primary validation mechanism for this control."
            ),
            "techniques": ["T1486", "T1490", "T1499", "T1498"],
        },
        # ── 6. People Controls ───────────────────────────────────────────────
        {
            "control_id": "6.1",
            "title": "Screening",
            "category": "6 — People Controls",
            "description": (
                "Requires background checks on employees and contractors proportionate to their "
                "access level. Insider threats are particularly dangerous because malicious insiders "
                "already hold valid credentials (T1078) and can abuse access tokens without "
                "triggering external alerts (T1134). Screening before hiring reduces the risk of "
                "placing a bad actor in a privileged position."
            ),
            "techniques": ["T1078", "T1134"],
        },
        {
            "control_id": "6.3",
            "title": "Information Security Awareness, Education and Training",
            "category": "6 — People Controls",
            "description": (
                "Requires regular, relevant security training for all personnel. The human element "
                "is the most exploited attack surface: phishing emails (T1566), malicious "
                "attachments or links (T1204), and credential harvesting via fake forms (T1598) "
                "succeed primarily because users are not trained to recognise them. Simulated "
                "phishing campaigns are the standard method to validate this control's effectiveness."
            ),
            "techniques": ["T1566", "T1204", "T1598"],
        },
        {
            "control_id": "6.4",
            "title": "Disciplinary Process",
            "category": "6 — People Controls",
            "description": (
                "Requires a formal, communicated disciplinary process for security policy violations. "
                "The existence of clear consequences deters insider misuse of credentials (T1078) "
                "and unauthorised modification of account permissions (T1098). It also provides a "
                "legally defensible framework when disciplinary action is needed after an incident."
            ),
            "techniques": ["T1078", "T1098"],
        },
        # ── 7. Physical Controls ─────────────────────────────────────────────
        {
            "control_id": "7.1",
            "title": "Physical Security Perimeters",
            "category": "7 — Physical Controls",
            "description": (
                "Requires physical barriers — security zones, badge access, locked server rooms — "
                "to prevent unauthorised physical access to information processing facilities. "
                "Physical access enables attacks that are impossible remotely, such as connecting "
                "rogue hardware devices (T1200) — keyloggers, network implants, or rogue access "
                "points — directly to internal systems."
            ),
            "techniques": ["T1200"],
        },
        {
            "control_id": "7.4",
            "title": "Physical Security Monitoring",
            "category": "7 — Physical Controls",
            "description": (
                "Requires surveillance and monitoring of physical access to sensitive areas — CCTV, "
                "access logs, visitor registers. Attackers with physical access can install hardware "
                "implants (T1200) or tamper with authentication components (T1556) that cannot be "
                "detected by purely network-based monitoring. Physical monitoring provides the "
                "detective control for these scenarios."
            ),
            "techniques": ["T1200", "T1556"],
        },
        # ── 8. Technological Controls ────────────────────────────────────────
        {
            "control_id": "8.2",
            "title": "Privileged Access Rights",
            "category": "8 — Technological Controls",
            "description": (
                "Requires strict management and minimisation of privileged accounts — administrator, "
                "root, service accounts, and emergency access credentials. Privileged accounts are "
                "the primary target in every major breach: attackers use valid admin credentials "
                "(T1078), escalate from standard user to admin (T1548), or manipulate access tokens "
                "to inherit elevated rights (T1134). Reducing the number and exposure of privileged "
                "accounts directly limits the damage an attacker can do."
            ),
            "techniques": ["T1078", "T1548", "T1134"],
        },
        {
            "control_id": "8.3",
            "title": "Information Access Restriction",
            "category": "8 — Technological Controls",
            "description": (
                "Requires that access to information and systems is restricted based on the "
                "principle of least privilege. Overly permissive access allows attackers to move "
                "laterally once inside — using remote services (T1021), abusing valid credentials "
                "(T1078), bypassing authorisation checks (T1548), or using stolen session tokens "
                "(T1550) to access systems the compromised user was never meant to reach."
            ),
            "techniques": ["T1078", "T1021", "T1548", "T1550"],
        },
        {
            "control_id": "8.5",
            "title": "Secure Authentication",
            "category": "8 — Technological Controls",
            "description": (
                "Requires strong authentication mechanisms — multi-factor authentication (MFA), "
                "password complexity, and session management — for all access to systems and "
                "applications. Weak authentication is the root cause of the majority of breaches: "
                "credential stuffing and password spraying (T1110), extracting password hashes from "
                "memory or disk (T1003), using previously stolen credentials (T1078), and forging "
                "Kerberos tickets (T1558). MFA alone blocks over 99% of automated credential "
                "attacks."
            ),
            "techniques": ["T1078", "T1110", "T1003", "T1558"],
        },
        {
            "control_id": "8.7",
            "title": "Protection Against Malware",
            "category": "8 — Technological Controls",
            "description": (
                "Requires anti-malware controls including detection software, user awareness, "
                "and policies on software use. Malware is delivered through multiple vectors: "
                "script-based execution (T1059), user-initiated execution of malicious files "
                "(T1204), phishing emails (T1566), and obfuscated or packed payloads designed "
                "to evade detection (T1027, T1140). Effective anti-malware combines endpoint "
                "detection, email filtering, and user training."
            ),
            "techniques": ["T1059", "T1204", "T1027", "T1566", "T1140"],
        },
        {
            "control_id": "8.8",
            "title": "Management of Technical Vulnerabilities",
            "category": "8 — Technological Controls",
            "description": (
                "Requires timely identification and remediation of technical vulnerabilities through "
                "a structured patch management and vulnerability scanning programme. Unpatched "
                "systems are a primary attack vector: exploiting public-facing applications (T1190), "
                "client-side vulnerabilities (T1203), local privilege escalation flaws (T1068), and "
                "remote service vulnerabilities (T1210). The faster vulnerabilities are patched, "
                "the shorter the window of exposure."
            ),
            "techniques": ["T1190", "T1203", "T1068", "T1210"],
        },
        {
            "control_id": "8.9",
            "title": "Configuration Management",
            "category": "8 — Technological Controls",
            "description": (
                "Requires secure baseline configurations for all systems, with change control to "
                "prevent unauthorised modifications. Attackers exploit insecure configurations to "
                "establish persistence: hijacking DLL search paths or environment variables (T1574), "
                "abusing event-triggered execution hooks (T1546), modifying registry settings "
                "(T1112), or installing malicious services (T1543). A hardened baseline makes these "
                "techniques significantly harder to execute."
            ),
            "techniques": ["T1574", "T1546", "T1112", "T1543"],
        },
        {
            "control_id": "8.12",
            "title": "Data Leakage Prevention",
            "category": "8 — Technological Controls",
            "description": (
                "Requires technical and procedural controls to prevent unauthorised disclosure of "
                "sensitive information. Data exfiltration is the end goal of most targeted attacks — "
                "via alternative protocols (T1048), command-and-control channels (T1041), cloud "
                "storage services (T1567), or blending with legitimate web traffic (T1071). DLP "
                "tools, network monitoring, and egress filtering are the primary technical controls "
                "validated by this Red Team coverage metric."
            ),
            "techniques": ["T1048", "T1041", "T1567", "T1071"],
        },
        {
            "control_id": "8.13",
            "title": "Information Backup",
            "category": "8 — Technological Controls",
            "description": (
                "Requires regular, tested backups of information and systems, with copies stored "
                "separately from the production environment. Ransomware specifically targets backups "
                "to maximise leverage — encrypting data (T1486), deleting or inhibiting recovery "
                "tools (T1490), and wiping disks (T1561). Immutable, offsite, and regularly tested "
                "backups are the only reliable defence against ransomware extortion."
            ),
            "techniques": ["T1486", "T1490", "T1561"],
        },
        {
            "control_id": "8.15",
            "title": "Logging",
            "category": "8 — Technological Controls",
            "description": (
                "Requires event logs to be generated, protected, and retained for all relevant "
                "systems. Logs are the primary evidence source for incident investigation — and "
                "therefore the primary target for attackers covering their tracks: clearing event "
                "logs (T1070) and disabling the security tools that generate them (T1562). Sending "
                "logs to a centralised, protected SIEM immediately reduces the risk of evidence "
                "destruction."
            ),
            "techniques": ["T1562", "T1070"],
        },
        {
            "control_id": "8.16",
            "title": "Monitoring Activities",
            "category": "8 — Technological Controls",
            "description": (
                "Requires continuous monitoring of systems, networks, and applications to detect "
                "anomalous activity. Without monitoring, attackers can operate undetected for months "
                "— executing commands (T1059), using scheduled tasks for persistence (T1053), "
                "establishing registry-based persistence (T1547), and disabling defences (T1562) "
                "without triggering any alerts. The score on this control directly reflects the "
                "organisation's ability to detect an active intrusion."
            ),
            "techniques": ["T1059", "T1053", "T1547", "T1562"],
        },
        {
            "control_id": "8.18",
            "title": "Use of Privileged Utility Programs",
            "category": "8 — Technological Controls",
            "description": (
                "Requires that privileged utility tools — system administration tools, scripting "
                "engines, diagnostic utilities — are tightly controlled and audited. Attackers "
                "routinely abuse built-in system utilities ('living-off-the-land'): command-line "
                "interpreters (T1059), privilege escalation tools (T1548, T1134), and system "
                "services (T1569). Restricting who can run these tools and logging all usage "
                "significantly limits an attacker's post-exploitation options."
            ),
            "techniques": ["T1059", "T1548", "T1134", "T1569"],
        },
        {
            "control_id": "8.19",
            "title": "Installation of Software on Operational Systems",
            "category": "8 — Technological Controls",
            "description": (
                "Requires authorisation and verification for any software installed on operational "
                "systems, including a software allowlist where practical. Attackers introduce "
                "malicious software through multiple channels: compromised software update "
                "mechanisms (T1195), deployment systems used as attack vectors (T1072), and "
                "persistence via auto-run keys or startup folders (T1546). An authorised software "
                "baseline makes unauthorised installations immediately detectable."
            ),
            "techniques": ["T1195", "T1072", "T1546"],
        },
        {
            "control_id": "8.20",
            "title": "Networks Security",
            "category": "8 — Technological Controls",
            "description": (
                "Requires network security controls — firewalls, IDS/IPS, network monitoring, and "
                "traffic filtering — to protect information in transit and prevent unauthorised "
                "network access. Attackers use non-standard ports (T1571), protocol tunnelling "
                "(T1572), multi-hop proxies (T1090), and lateral movement via remote services "
                "(T1021) to evade network defences. Network controls are the last line of detection "
                "before data leaves the organisation."
            ),
            "techniques": ["T1571", "T1572", "T1090", "T1021"],
        },
        {
            "control_id": "8.22",
            "title": "Segregation of Networks",
            "category": "8 — Technological Controls",
            "description": (
                "Requires that networks are segmented into separate zones based on trust level and "
                "data sensitivity, with controls between zones. Network segmentation limits the "
                "blast radius of a breach — without it, attackers move freely between systems using "
                "remote services (T1021), abusing alternate authentication material (T1550), or "
                "stealing Kerberos tickets to access other network segments (T1558). Effective "
                "segmentation forces attackers to 'break through' multiple layers."
            ),
            "techniques": ["T1021", "T1550", "T1558"],
        },
        {
            "control_id": "8.23",
            "title": "Web Filtering",
            "category": "8 — Technological Controls",
            "description": (
                "Requires filtering of web access to block malicious or unauthorised sites, "
                "protecting users from web-based threats. Phishing links (T1566), drive-by "
                "downloads (T1189), and social engineering attacks (T1204) all rely on users "
                "reaching malicious web destinations. Web filtering provides a technical backstop "
                "that catches threats even when users fail to recognise them."
            ),
            "techniques": ["T1566", "T1204", "T1189"],
        },
        {
            "control_id": "8.24",
            "title": "Use of Cryptography",
            "category": "8 — Technological Controls",
            "description": (
                "Requires appropriate use of encryption to protect the confidentiality and integrity "
                "of sensitive information. Without encryption, attackers can intercept communications "
                "using encrypted channels they control (T1573), extract data from compressed archives "
                "(T1022), or obfuscate malicious payloads to evade detection (T1027). Enforcing "
                "strong, approved encryption algorithms and key management reduces exposure across "
                "all of these scenarios."
            ),
            "techniques": ["T1573", "T1022", "T1027"],
        },
        {
            "control_id": "8.26",
            "title": "Application Security Requirements",
            "category": "8 — Technological Controls",
            "description": (
                "Requires that information security requirements are identified and agreed at the "
                "start of any application development or procurement. Applications without defined "
                "security requirements frequently ship with exploitable vulnerabilities — allowing "
                "attackers to exploit public-facing applications (T1190), execute code via "
                "vulnerable client-side logic (T1203), or run commands through application "
                "weaknesses (T1059). Security requirements must be defined before coding begins, "
                "not retrofitted after deployment."
            ),
            "techniques": ["T1190", "T1059", "T1203"],
        },
        {
            "control_id": "8.28",
            "title": "Secure Coding",
            "category": "8 — Technological Controls",
            "description": (
                "Requires software development teams to follow secure coding principles, including "
                "input validation, output encoding, and security testing as part of the build "
                "process. Coding flaws are a primary source of exploitable vulnerabilities — "
                "enabling injection attacks (T1059), exploitation of application weaknesses (T1190), "
                "and client-side code execution (T1203). A mature secure development lifecycle "
                "catches these flaws before they reach production."
            ),
            "techniques": ["T1059", "T1190", "T1203"],
        },
        {
            "control_id": "8.32",
            "title": "Change Management",
            "category": "8 — Technological Controls",
            "description": (
                "Requires a formal change management process for all changes to information "
                "processing facilities and systems. Unauthorised changes are a key attacker "
                "technique for maintaining persistent access — hijacking execution via DLL "
                "planting (T1574), registering malicious event handlers (T1546), or modifying "
                "registry keys (T1112). Change management with mandatory review and approval "
                "makes these modifications immediately visible and attributable."
            ),
            "techniques": ["T1574", "T1546", "T1112"],
        },
        {
            "control_id": "8.34",
            "title": "Protection of Information Systems During Audit Testing",
            "category": "8 — Technological Controls",
            "description": (
                "Requires that audit and penetration testing activities are carefully planned and "
                "controlled so that tests do not disrupt live systems or become a vector for attack. "
                "Poorly controlled audit tests can inadvertently disable security monitoring (T1562) "
                "or trigger unintended script execution (T1059). This control also ensures audit "
                "access credentials are not reused by attackers after the engagement."
            ),
            "techniques": ["T1562", "T1059"],
        },
    ]

    return _import_curated_framework(db, framework, ISO_27001_CONTROLS)


def import_iso_42001_mappings(db: Session) -> dict:
    """Import ISO/IEC 42001:2023 Annex A controls with ATT&CK technique mappings.

    ISO/IEC 42001:2023 is the international standard for Artificial Intelligence
    Management Systems (AIMS). Annex A contains controls specific to the governance,
    operation, and security of AI systems.

    ATT&CK mappings focus on threats to AI system infrastructure:
    - Supply chain attacks on ML frameworks and model artefacts
    - Data pipeline compromise (training data poisoning / exfiltration)
    - AI model serving exploitation
    - Adversarial access to AI APIs and datasets

    Returns a summary dict with counts.
    """
    framework = (
        db.query(ComplianceFramework)
        .filter(ComplianceFramework.name == "ISO/IEC 42001:2023")
        .first()
    )

    if not framework:
        framework = ComplianceFramework(
            name="ISO/IEC 42001:2023",
            version="2023",
            description=(
                "ISO/IEC 42001:2023 — International standard for Artificial Intelligence "
                "Management Systems (AIMS). Establishes requirements and guidance for "
                "organisations developing or using AI systems responsibly, covering governance, "
                "risk, transparency, and security of AI pipelines and models."
            ),
            url="https://www.iso.org/standard/81230.html",
            is_active=True,
        )
        db.add(framework)
        db.flush()
        logger.info("Created ISO/IEC 42001:2023 framework")
    else:
        logger.info("ISO/IEC 42001:2023 framework already exists")

    # NOTE: ISO/IEC 42001:2023 focuses on AI governance, not cybersecurity controls per se.
    # The ATT&CK technique mappings here represent threats to the IT INFRASTRUCTURE that
    # supports AI systems (data pipelines, model APIs, ML supply chains), not AI-specific
    # attack techniques. MITRE ATT&CK Enterprise v14 does not yet include dedicated
    # AI-targeted techniques. These mappings are based on the Centre for Security AI
    # research community consensus (2023-2024) pending official CTID guidance.
    ISO_42001_CONTROLS = [
        # ── A.2 Organization's Policies Related to AI ────────────────────────
        {
            "control_id": "A.2.2",
            "title": "Process to Determine AI Impacts on Individuals",
            "category": "A.2 — AI Policy",
            "description": (
                "Requires a systematic process to identify how AI system decisions or outputs "
                "could affect individuals — including employees, customers, and third parties. "
                "From a security perspective, an attacker who can map the AI's decision logic "
                "or data sources (T1082, T1592, T1590) can design adversarial inputs or "
                "manipulation strategies. Understanding AI impacts is therefore also a prerequisite "
                "for assessing the business risk of an AI system compromise."
            ),
            "techniques": ["T1082", "T1592", "T1590"],
        },
        {
            "control_id": "A.2.6",
            "title": "Responsible Development and Use of AI",
            "category": "A.2 — AI Policy",
            "description": (
                "Requires policies for responsible AI development covering transparency, fairness, "
                "and safety. From a Red Team perspective, irresponsible development practices — "
                "such as using untrusted open-source components (T1195) or failing to validate "
                "AI pipeline scripts (T1059) — create attack surfaces that are difficult to defend. "
                "This control establishes the governance foundation for secure AI development."
            ),
            "techniques": ["T1195", "T1059"],
        },
        # ── A.3 Internal Organization ─────────────────────────────────────────
        {
            "control_id": "A.3.2",
            "title": "Roles and Responsibilities for AI Systems",
            "category": "A.3 — Internal Organization",
            "description": (
                "Requires clear ownership of AI systems — who builds them, who operates them, "
                "and who is accountable for their security and ethical use. Without defined "
                "ownership, AI system accounts are often orphaned and unmonitored (T1078), "
                "account enumeration goes unnoticed (T1087), and excessive group memberships "
                "accumulate (T1069). This control ensures someone is accountable for the security "
                "posture of every AI component."
            ),
            "techniques": ["T1078", "T1087", "T1069"],
        },
        {
            "control_id": "A.3.3",
            "title": "Reporting on AI Performance",
            "category": "A.3 — Internal Organization",
            "description": (
                "Requires regular reporting on AI system performance, including anomalies and "
                "incidents. Attackers who manipulate an AI system (e.g., through data poisoning or "
                "adversarial inputs) will often disable or tamper with the monitoring systems that "
                "would reveal the manipulation (T1562) and clear associated logs (T1070). Robust "
                "reporting that is independent of the AI system itself provides resilience against "
                "this class of attack."
            ),
            "techniques": ["T1562", "T1070"],
        },
        # ── A.4 Resources for AI Systems ─────────────────────────────────────
        {
            "control_id": "A.4.1",
            "title": "Resource Management for AI Systems",
            "category": "A.4 — AI Resources",
            "description": (
                "Requires adequate and protected computing resources for AI systems — GPU/CPU "
                "clusters, storage, and inference infrastructure. AI systems require significant "
                "compute and are therefore targets for resource exhaustion attacks: application-layer "
                "denial of service (T1499) and volumetric network flooding (T1498) can prevent "
                "AI services from operating or degrade their accuracy. Resource provisioning and "
                "DDoS protection are essential safeguards."
            ),
            "techniques": ["T1499", "T1498"],
        },
        {
            "control_id": "A.4.2",
            "title": "AI System Supply Chain Management",
            "category": "A.4 — AI Resources",
            "description": (
                "Requires vetting and ongoing management of third-party components used in AI "
                "systems — including pre-trained models, ML frameworks (TensorFlow, PyTorch), "
                "datasets, and cloud AI services. Supply chain attacks are a growing threat: "
                "adversaries compromise software dependencies (T1195), exploit trusted third-party "
                "relationships (T1199), or abuse software deployment mechanisms (T1072) to inject "
                "malicious behaviour into AI pipelines. Model supply chain integrity is especially "
                "critical because a backdoored model may produce correct outputs in testing but "
                "behave maliciously in production."
            ),
            "techniques": ["T1195", "T1199", "T1072"],
        },
        # ── A.5 Assessing Impacts of AI Systems ──────────────────────────────
        {
            "control_id": "A.5.2",
            "title": "AI System Impact Assessment",
            "category": "A.5 — AI Impact Assessment",
            "description": (
                "Requires formal assessment of the risks and impacts of deploying an AI system "
                "before it goes live and periodically thereafter. From a threat perspective, "
                "attackers perform reconnaissance on AI systems to understand their capabilities, "
                "inputs, and data sources: system enumeration (T1082), network infrastructure "
                "discovery (T1592), and victim intelligence gathering (T1589). An impact assessment "
                "that identifies sensitive inputs and high-value outputs helps prioritise where "
                "security controls are most needed."
            ),
            "techniques": ["T1082", "T1592", "T1589"],
        },
        {
            "control_id": "A.5.4",
            "title": "AI Risk Treatment",
            "category": "A.5 — AI Impact Assessment",
            "description": (
                "Requires that identified AI risks have defined treatment plans — accepted, "
                "mitigated, transferred, or avoided. Risk treatment for AI systems must address "
                "exploitation of the AI API or web interface (T1190), privilege escalation within "
                "the AI infrastructure (T1068), and client-side attacks targeting users of AI "
                "applications (T1203). Untreated risks represent known attack surfaces that "
                "adversaries will exploit."
            ),
            "techniques": ["T1190", "T1068", "T1203"],
        },
        # ── A.6 AI System Life Cycle ──────────────────────────────────────────
        {
            "control_id": "A.6.1",
            "title": "AI System Life Cycle Management",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires security to be embedded throughout the AI system life cycle — from data "
                "collection through training, deployment, monitoring, and decommissioning. Each "
                "phase introduces distinct attack surfaces: compromised training dependencies "
                "(T1195), malicious execution during build pipelines (T1574), and persistence "
                "mechanisms introduced via rogue services in the AI infrastructure (T1543). "
                "Life cycle security ensures no phase is left unguarded."
            ),
            "techniques": ["T1195", "T1574", "T1543"],
        },
        {
            "control_id": "A.6.2",
            "title": "AI Objectives and Requirements",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires that security and privacy requirements are captured alongside functional "
                "requirements from the start of an AI project. AI systems built without security "
                "requirements frequently expose exploitable APIs (T1190) or allow arbitrary code "
                "execution through unvalidated inputs (T1059). Defining security requirements early "
                "is far cheaper than remediating vulnerabilities after deployment."
            ),
            "techniques": ["T1190", "T1059"],
        },
        {
            "control_id": "A.6.3",
            "title": "AI System Design and Implementation",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires security-conscious design and implementation of AI systems — including "
                "input validation, secure API design, and minimal attack surface. Poorly designed "
                "AI systems are vulnerable to supply chain attacks on dependencies (T1195), "
                "command injection via model prompts or API inputs (T1059), exploitation of the "
                "serving infrastructure (T1190), and obfuscated malicious components in model "
                "artefacts (T1027). Secure design principles applied during implementation "
                "prevent these vulnerabilities from being introduced."
            ),
            "techniques": ["T1195", "T1059", "T1190", "T1027"],
        },
        {
            "control_id": "A.6.4",
            "title": "AI System Verification and Validation",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires testing and validation that the AI system performs as intended and does "
                "not behave maliciously. Security validation must detect data manipulation in the "
                "training or inference pipeline (T1565) — often called 'data poisoning' — and "
                "compromised model artefacts from supply chain attacks (T1195). Validation that "
                "only checks functional accuracy will miss these attack vectors entirely."
            ),
            "techniques": ["T1565", "T1195"],
        },
        {
            "control_id": "A.6.5",
            "title": "AI System Documentation",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires comprehensive documentation of AI system architecture, data flows, model "
                "versions, and dependencies. Documentation itself can become an attack vector if "
                "not protected: attackers who access internal AI documentation can discover "
                "sensitive file locations (T1083) and extract proprietary training data or model "
                "weights (T1005). Documentation must be classified, access-controlled, and "
                "version-controlled."
            ),
            "techniques": ["T1083", "T1005"],
        },
        {
            "control_id": "A.6.6",
            "title": "AI System Monitoring",
            "category": "A.6 — AI Life Cycle",
            "description": (
                "Requires operational monitoring of AI system behaviour — including anomaly "
                "detection for unexpected inputs, outputs, and performance degradation. Attackers "
                "who successfully compromise an AI system will attempt to disable or manipulate "
                "its monitoring (T1562), clear associated operational logs (T1070), and maintain "
                "persistence through the system's own execution mechanisms (T1059). Independent, "
                "tamper-resistant monitoring is essential for detecting AI-targeted attacks."
            ),
            "techniques": ["T1562", "T1070", "T1059"],
        },
        # ── A.7 Data for AI Systems ───────────────────────────────────────────
        {
            "control_id": "A.7.2",
            "title": "Data Acquisition",
            "category": "A.7 — AI Data",
            "description": (
                "Requires that data used to train or operate AI systems is acquired from authorised "
                "sources with appropriate consents and security controls. Training datasets "
                "represent high-value intellectual property — attackers target them for exfiltration "
                "by collecting data directly from systems (T1005), staging it for extraction (T1074), "
                "or harvesting it from email and document stores (T1114). Unauthorised data "
                "acquisition also violates privacy regulations (GDPR, AI Act)."
            ),
            "techniques": ["T1005", "T1074", "T1114"],
        },
        {
            "control_id": "A.7.3",
            "title": "Data Preparation",
            "category": "A.7 — AI Data",
            "description": (
                "Requires secure and auditable data preparation pipelines — cleaning, labelling, "
                "feature engineering — with controls to prevent unauthorised modification. "
                "Data preparation pipelines are a primary vector for training data poisoning "
                "(T1565), where an attacker subtly modifies training samples to cause the model "
                "to learn incorrect or backdoored behaviour. Adversarial data scientists have "
                "demonstrated that poisoning as little as 0.1% of training data can compromise "
                "a model. Pipeline scripts must also be code-reviewed (T1059)."
            ),
            "techniques": ["T1565", "T1059"],
        },
        {
            "control_id": "A.7.4",
            "title": "Data Quality",
            "category": "A.7 — AI Data",
            "description": (
                "Requires that data used in AI systems meets defined quality standards and is "
                "monitored for drift or degradation. Attackers can degrade AI system performance "
                "by manipulating input data quality (T1565) or destroying datasets entirely "
                "(T1485). In production, adversarial examples — carefully crafted inputs designed "
                "to fool the model — exploit the gap between training data distribution and "
                "real-world inputs. Data quality controls include anomaly detection on inputs."
            ),
            "techniques": ["T1565", "T1485"],
        },
        {
            "control_id": "A.7.5",
            "title": "Data Provenance",
            "category": "A.7 — AI Data",
            "description": (
                "Requires tracking the origin, transformation history, and custody chain of all "
                "data used in AI systems. Without provenance tracking, it is impossible to detect "
                "whether a dataset was poisoned in the supply chain (T1195) or maliciously "
                "modified during processing (T1565). Data provenance also provides the audit trail "
                "needed for regulatory compliance (EU AI Act, GDPR) and incident investigation "
                "after an AI system produces unexpected outputs."
            ),
            "techniques": ["T1195", "T1565"],
        },
        {
            "control_id": "A.7.6",
            "title": "Data Privacy",
            "category": "A.7 — AI Data",
            "description": (
                "Requires that personal data used in AI systems is processed lawfully, minimised, "
                "and protected against unauthorised access. AI training datasets often contain "
                "sensitive personal information that is a prime exfiltration target: direct "
                "collection from local systems (T1005), harvesting from email (T1114), and "
                "exfiltration via alternative protocols (T1048) or C2 channels (T1041). A "
                "successful breach that extracts a training dataset typically constitutes a "
                "reportable data breach under GDPR."
            ),
            "techniques": ["T1005", "T1114", "T1048", "T1041"],
        },
        # ── A.8 Information About Use of AI Systems ───────────────────────────
        {
            "control_id": "A.8.1",
            "title": "Transparency and Explainability of AI Systems",
            "category": "A.8 — AI Information",
            "description": (
                "Requires that AI systems are transparent about their capabilities, limitations, "
                "and decision logic to relevant stakeholders. From a security standpoint, lack of "
                "explainability makes it harder to detect when an AI system is being manipulated. "
                "Attackers gather information about AI system architecture (T1082), map connected "
                "infrastructure (T1592), and collect intelligence on the organisation's AI "
                "capabilities (T1590) to design targeted exploits. Transparency controls and "
                "model cards reduce this information asymmetry for defenders."
            ),
            "techniques": ["T1082", "T1592", "T1590"],
        },
        {
            "control_id": "A.8.2",
            "title": "Security of AI Systems",
            "category": "A.8 — AI Information",
            "description": (
                "The core security control of ISO 42001: requires that AI systems are protected "
                "against adversarial attacks, unauthorised access, and integrity violations. This "
                "covers the full attack surface of an AI system in production: exploitation of "
                "the serving API (T1190), command injection or prompt injection attacks (T1059), "
                "exploitation of vulnerabilities in the inference framework (T1203), credential "
                "attacks on AI platform accounts (T1078), and brute-force attacks on AI service "
                "endpoints (T1110). This control must be validated through Red Team exercises "
                "that specifically target AI infrastructure."
            ),
            "techniques": ["T1190", "T1059", "T1203", "T1078", "T1110"],
        },
        # ── A.9 Use of AI Systems by Affected Parties ─────────────────────────
        {
            "control_id": "A.9.1",
            "title": "Intended Use of AI Systems",
            "category": "A.9 — AI Use",
            "description": (
                "Requires that AI systems are only used for their intended, authorised purpose and "
                "that users are informed about appropriate use boundaries. Adversaries exploit AI "
                "systems for unintended uses — for example, using AI-generated content as a "
                "phishing vector (T1566), tricking users into executing AI-generated malicious "
                "content (T1204), or using AI tools to automate credential harvesting campaigns "
                "(T1598). Controls on intended use reduce the organisation's liability and attack "
                "surface simultaneously."
            ),
            "techniques": ["T1566", "T1204", "T1598"],
        },
        {
            "control_id": "A.9.3",
            "title": "Human Oversight of AI Systems",
            "category": "A.9 — AI Use",
            "description": (
                "Requires meaningful human oversight for high-risk AI decisions, including the "
                "ability to intervene or override AI system outputs. An AI system without human "
                "oversight that is compromised by an attacker — through credential theft (T1078), "
                "token manipulation (T1134), or disabling its safety monitoring (T1562) — can "
                "make autonomous decisions with real-world consequences before anyone notices. "
                "Human oversight is both an ethical requirement (EU AI Act) and a critical "
                "security control."
            ),
            "techniques": ["T1078", "T1134", "T1562"],
        },
        # ── A.10 Third-Party and Customer Relationships ───────────────────────
        {
            "control_id": "A.10.1",
            "title": "Third-Party AI System Governance",
            "category": "A.10 — Third-Party Relationships",
            "description": (
                "Requires governance of third-party AI services, APIs, and models used by the "
                "organisation — including due diligence, contractual security requirements, and "
                "ongoing monitoring. Third-party AI services are an increasingly attractive attack "
                "target: adversaries exploit trusted relationships (T1199), compromise the software "
                "supply chain of AI providers (T1195), abuse valid credentials to access third-party "
                "AI APIs (T1078), or use legitimate external remote services as exfiltration "
                "channels (T1133). This control establishes the security baseline for all AI "
                "third-party dependencies."
            ),
            "techniques": ["T1199", "T1195", "T1078", "T1133"],
        },
        {
            "control_id": "A.10.2",
            "title": "Customer Relationships for AI Systems",
            "category": "A.10 — Third-Party Relationships",
            "description": (
                "Requires appropriate disclosure to customers about the AI systems used in products "
                "or services that affect them, including security and privacy implications. "
                "Lack of customer disclosure creates risks when AI-generated content is weaponised "
                "against users: AI-crafted phishing messages (T1566), information gathering via AI "
                "personas (T1598), and credential harvesting targeting users who trust AI-powered "
                "interfaces (T1078). Transparent disclosure allows customers to make informed "
                "risk decisions."
            ),
            "techniques": ["T1566", "T1598", "T1078"],
        },
    ]

    return _import_curated_framework(db, framework, ISO_42001_CONTROLS)


def _import_curated_framework(
    db: Session,
    framework: ComplianceFramework,
    controls: list[dict],
) -> dict:
    """Shared helper to import a curated list of controls and technique mappings.

    ``controls`` is a list of dicts with keys:
    - control_id (str)
    - title (str)
    - category (str)
    - techniques (list[str] — MITRE ATT&CK IDs)

    Returns a summary dict.
    """
    all_techniques = {t.mitre_id: t for t in db.query(Technique).all()}

    existing_controls = {
        c.control_id: c
        for c in db.query(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    }

    existing_mappings: set[tuple[str, str]] = set()
    for m in (
        db.query(ComplianceControlMapping)
        .join(ComplianceControl)
        .filter(ComplianceControl.framework_id == framework.id)
        .all()
    ):
        existing_mappings.add((str(m.compliance_control_id), str(m.technique_id)))

    controls_created = 0
    mappings_created = 0

    for item in controls:
        cid = item["control_id"]
        if cid in existing_controls:
            control = existing_controls[cid]
        else:
            control = ComplianceControl(
                framework_id=framework.id,
                control_id=cid,
                title=item["title"],
                category=item.get("category"),
                description=item.get("description"),
            )
            db.add(control)
            db.flush()
            existing_controls[cid] = control
            controls_created += 1

        for mitre_id in item.get("techniques", []):
            technique = all_techniques.get(mitre_id)
            if not technique:
                continue
            key = (str(control.id), str(technique.id))
            if key in existing_mappings:
                continue
            db.add(ComplianceControlMapping(
                compliance_control_id=control.id,
                technique_id=technique.id,
            ))
            existing_mappings.add(key)
            mappings_created += 1

    db.commit()

    summary = {
        "framework": framework.name,
        "controls_created": controls_created,
        "controls_existing": len(existing_controls) - controls_created,
        "mappings_created": mappings_created,
        "mappings_skipped": 0,
        "total_controls": len(existing_controls),
    }
    logger.info(f"{framework.name} import complete: {summary}")
    return summary


def _get_nist_category(family_code: str) -> str:
    """Map NIST 800-53 family code to category name."""
    categories = {
        "AC": "Access Control",
        "AT": "Awareness and Training",
        "AU": "Audit and Accountability",
        "CA": "Assessment, Authorization, and Monitoring",
        "CM": "Configuration Management",
        "CP": "Contingency Planning",
        "IA": "Identification and Authentication",
        "IR": "Incident Response",
        "MA": "Maintenance",
        "MP": "Media Protection",
        "PE": "Physical and Environmental Protection",
        "PL": "Planning",
        "PM": "Program Management",
        "PS": "Personnel Security",
        "PT": "Personally Identifiable Information Processing and Transparency",
        "RA": "Risk Assessment",
        "SA": "System and Services Acquisition",
        "SC": "System and Communications Protection",
        "SI": "System and Information Integrity",
        "SR": "Supply Chain Risk Management",
    }
    return categories.get(family_code, "Unknown")