feat(phase-22): add import services for Sigma, LOLBAS, GTFOBins, CALDERA, Elastic and data sources panel (T-203 to T-207)

2026-02-09 16:19:44 +01:00
parent 022c4f2886
commit f4c8cbf768
11 changed files with 2039 additions and 0 deletions
@@ -18,6 +18,7 @@ from app.routers import users as users_router
 from app.routers import audit as audit_router
 from app.routers import notifications as notifications_router
 from app.routers import reports as reports_router
 from app.routers import data_sources as data_sources_router
 from app.storage import ensure_bucket_exists
 from app.jobs.mitre_sync_job import start_scheduler, scheduler
@@ -60,6 +61,7 @@ app.include_router(users_router.router, prefix="/api/v1")
 app.include_router(audit_router.router, prefix="/api/v1")
 app.include_router(notifications_router.router, prefix="/api/v1")
 app.include_router(reports_router.router, prefix="/api/v1")
 app.include_router(data_sources_router.router, prefix="/api/v1")
@app.get("/health")
@@ -0,0 +1,292 @@
 """Data sources management endpoints (admin only).
 Provides a centralized panel for managing all external data sources
 (Atomic Red Team, Sigma, LOLBAS, GTFOBins, CALDERA, Elastic, etc.)
 including sync triggers, enable/disable toggles, and statistics.
 """
 import logging
 from datetime import datetime
 from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy.orm import Session
 from app.database import get_db
 from app.dependencies.auth import require_role
 from app.models.user import User
 from app.models.data_source import DataSource
 from app.services.audit_service import log_action
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/data-sources", tags=["data-sources"])
 # ---------------------------------------------------------------------------
 # Sync dispatcher — maps source name → import function
 # ---------------------------------------------------------------------------
 def _get_sync_handler(source_name: str):
    """Lazily import and return the sync function for *source_name*.
    We import lazily to avoid circular imports and to only load the
    modules that are actually needed.
    """
    handlers = {
        "atomic_red_team": ("app.services.atomic_import_service", "import_atomic_red_team"),
        "sigma": ("app.services.sigma_import_service", "sync"),
        "lolbas": ("app.services.lolbas_import_service", "sync"),
        "gtfobins": ("app.services.lolbas_import_service", "sync_gtfobins"),
        "caldera": ("app.services.caldera_import_service", "sync"),
        "elastic_rules": ("app.services.elastic_import_service", "sync"),
        # d3fend and mitre_cti added in later phases
    }
    if source_name not in handlers:
        return None
    module_path, func_name = handlers[source_name]
    import importlib
    mod = importlib.import_module(module_path)
    return getattr(mod, func_name)
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@router.get("")
 def list_data_sources(
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """List all registered data sources.
    **Requires** the ``admin`` role.
    """
    sources = db.query(DataSource).order_by(DataSource.name).all()
    return [
        {
            "id": str(s.id),
            "name": s.name,
            "display_name": s.display_name,
            "type": s.type,
            "url": s.url,
            "description": s.description,
            "is_enabled": s.is_enabled,
            "last_sync_at": s.last_sync_at.isoformat() if s.last_sync_at else None,
            "last_sync_status": s.last_sync_status,
            "last_sync_stats": s.last_sync_stats,
            "sync_frequency": s.sync_frequency,
            "config": s.config,
            "created_at": s.created_at.isoformat() if s.created_at else None,
        }
        for s in sources
    ]
@router.patch("/{source_id}")
 def update_data_source(
    source_id: str,
    body: dict,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """Update a data source (enable/disable, change config).
    **Requires** the ``admin`` role.
    Body fields (all optional):
    - ``is_enabled`` (bool)
    - ``sync_frequency`` (str)
    - ``config`` (dict)
    """
    ds = db.query(DataSource).filter(DataSource.id == source_id).first()
    if not ds:
        raise HTTPException(status_code=404, detail="Data source not found")
    if "is_enabled" in body:
        ds.is_enabled = bool(body["is_enabled"])
    if "sync_frequency" in body:
        ds.sync_frequency = body["sync_frequency"]
    if "config" in body:
        ds.config = body["config"]
    db.commit()
    log_action(
        db,
        user_id=current_user.id,
        action="update_data_source",
        entity_type="data_source",
        entity_id=str(ds.id),
        details={"updates": body},
    )
    return {"message": "Data source updated", "id": str(ds.id)}
@router.post("/{source_id}/sync")
 def sync_data_source(
    source_id: str,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """Trigger sync/import for a specific data source.
    **Requires** the ``admin`` role.
    """
    ds = db.query(DataSource).filter(DataSource.id == source_id).first()
    if not ds:
        raise HTTPException(status_code=404, detail="Data source not found")
    handler = _get_sync_handler(ds.name)
    if handler is None:
        raise HTTPException(
            status_code=400,
            detail=f"No sync handler available for '{ds.name}'",
        )
    # Mark as in_progress
    ds.last_sync_status = "in_progress"
    db.commit()
    try:
        summary = handler(db)
    except Exception as exc:
        logger.error("Sync failed for %s: %s", ds.name, exc)
        ds.last_sync_status = "error"
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_stats = {"error": str(exc)}
        db.commit()
        raise HTTPException(
            status_code=500,
            detail=f"Sync failed: {str(exc)}",
        )
    # Update DS record (the handler may already have done this,
    # but we ensure it here as well)
    ds.last_sync_at = datetime.utcnow()
    ds.last_sync_status = "success"
    ds.last_sync_stats = summary
    db.commit()
    return {
        "message": f"Sync complete for {ds.display_name}",
        "source": ds.name,
        "stats": summary,
    }
@router.post("/sync-all")
 def sync_all_data_sources(
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """Trigger sync for all enabled data sources (sequentially).
    **Requires** the ``admin`` role.
    """
    enabled_sources = (
        db.query(DataSource)
        .filter(DataSource.is_enabled == True)
        .order_by(DataSource.name)
        .all()
    )
    results = []
    for ds in enabled_sources:
        handler = _get_sync_handler(ds.name)
        if handler is None:
            results.append({
                "source": ds.name,
                "status": "skipped",
                "detail": "No sync handler available",
            })
            continue
        ds.last_sync_status = "in_progress"
        db.commit()
        try:
            summary = handler(db)
            ds.last_sync_at = datetime.utcnow()
            ds.last_sync_status = "success"
            ds.last_sync_stats = summary
            db.commit()
            results.append({
                "source": ds.name,
                "status": "success",
                "stats": summary,
            })
        except Exception as exc:
            logger.error("Sync failed for %s: %s", ds.name, exc)
            ds.last_sync_status = "error"
            ds.last_sync_at = datetime.utcnow()
            ds.last_sync_stats = {"error": str(exc)}
            db.commit()
            results.append({
                "source": ds.name,
                "status": "error",
                "detail": str(exc),
            })
    log_action(
        db,
        user_id=current_user.id,
        action="sync_all_data_sources",
        entity_type="data_source",
        entity_id=None,
        details={"results": results},
    )
    return {"message": "Sync all complete", "results": results}
@router.get("/{source_id}/stats")
 def get_data_source_stats(
    source_id: str,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_role("admin")),
 ):
    """Get detailed statistics for a specific data source.
    **Requires** the ``admin`` role.
    """
    ds = db.query(DataSource).filter(DataSource.id == source_id).first()
    if not ds:
        raise HTTPException(status_code=404, detail="Data source not found")
    # Count items from this source
    from app.models.test_template import TestTemplate
    from app.models.detection_rule import DetectionRule
    template_count = 0
    rule_count = 0
    if ds.type == "attack_procedure":
        template_count = (
            db.query(TestTemplate)
            .filter(TestTemplate.source == ds.name)
            .count()
        )
    elif ds.type == "detection_rule":
        rule_count = (
            db.query(DetectionRule)
            .filter(DetectionRule.source == ds.name)
            .count()
        )
    return {
        "id": str(ds.id),
        "name": ds.name,
        "display_name": ds.display_name,
        "type": ds.type,
        "is_enabled": ds.is_enabled,
        "last_sync_at": ds.last_sync_at.isoformat() if ds.last_sync_at else None,
        "last_sync_status": ds.last_sync_status,
        "last_sync_stats": ds.last_sync_stats,
        "total_templates": template_count,
        "total_rules": rule_count,
    }
@@ -0,0 +1,274 @@
 """MITRE CALDERA abilities import service.
 Downloads the CALDERA repository ZIP from GitHub, parses the ability YAML
 files under ``data/abilities/{tactic}/``, and creates :class:`TestTemplate`
 records in the database.
 Strategy
 --------
 1. Download the CALDERA repo as a ZIP.
 2. Extract into a temporary directory.
 3. Walk ``data/abilities/{tactic}/*.yml`` files.
 4. For each ability: extract name, description, technique ID, platforms,
   and executor commands.
 5. Create TestTemplate rows keyed by the ability's ``id`` field.
 6. Clean up.
 Idempotency
 -----------
 Running the import twice does **not** create duplicates.  Existing
 templates are identified by ``source = "caldera"`` + ``atomic_test_id``
 (the CALDERA ability ``id``).
 """
 import io
 import logging
 import shutil
 import tempfile
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import requests as _requests
 import yaml
 from sqlalchemy.orm import Session
 from app.models.test_template import TestTemplate
 from app.models.data_source import DataSource
 from app.services.audit_service import log_action
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 CALDERA_ZIP_URL = (
    "https://github.com/mitre/caldera"
    "/archive/refs/heads/master.zip"
 )
 _DOWNLOAD_TIMEOUT = 300
 _ZIP_ROOT_PREFIX = "caldera-master"
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 def _download_zip(url: str = CALDERA_ZIP_URL) -> bytes:
    """Download the CALDERA ZIP and return raw bytes."""
    logger.info("Downloading CALDERA ZIP from %s …", url)
    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    resp.raise_for_status()
    content = resp.content
    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
    return content
 def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
    """Extract *zip_bytes* into *dest* and return abilities dir."""
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        zf.extractall(dest)
    abilities_dir = Path(dest) / _ZIP_ROOT_PREFIX / "data" / "abilities"
    if not abilities_dir.is_dir():
        raise FileNotFoundError(
            f"Expected abilities directory not found at {abilities_dir}"
        )
    return abilities_dir
 def _extract_commands(platforms_dict: dict) -> str:
    """Extract executor commands from CALDERA platforms dict.
    The structure is typically::
        platforms:
          windows:
            psh:
              command: "whoami"
          linux:
            sh:
              command: "id"
    Returns a formatted string with all commands.
    """
    lines = []
    if not isinstance(platforms_dict, dict):
        return ""
    for os_name, executors in platforms_dict.items():
        if not isinstance(executors, dict):
            continue
        for executor_name, executor_data in executors.items():
            if isinstance(executor_data, dict):
                cmd = executor_data.get("command", "")
                if cmd:
                    lines.append(f"[{os_name}/{executor_name}]\n{cmd}")
            elif isinstance(executor_data, str):
                lines.append(f"[{os_name}/{executor_name}]\n{executor_data}")
    return "\n\n".join(lines)
 def _extract_platforms(platforms_dict: dict) -> str:
    """Extract platform names from CALDERA platforms dict."""
    if not isinstance(platforms_dict, dict):
        return ""
    platform_names = []
    for os_name in platforms_dict:
        normalized = str(os_name).lower().strip()
        if normalized in ("windows", "linux", "darwin", "macos"):
            if normalized == "darwin":
                normalized = "macos"
            if normalized not in platform_names:
                platform_names.append(normalized)
    return ", ".join(platform_names)
 def _parse_abilities(abilities_dir: Path) -> list[dict]:
    """Walk abilities directories and parse all YAML files.
    Returns a flat list of dicts, each representing one ability.
    """
    results: list[dict] = []
    yaml_files = sorted(abilities_dir.rglob("*.yml"))
    logger.info("Found %d ability YAML files", len(yaml_files))
    for yaml_path in yaml_files:
        try:
            with open(yaml_path, "r", encoding="utf-8") as fh:
                data_list = list(yaml.safe_load_all(fh))
        except Exception as exc:
            logger.debug("Failed to parse %s: %s", yaml_path, exc)
            continue
        for data in data_list:
            if not isinstance(data, dict):
                continue
            ability_id = data.get("id", "")
            if not ability_id:
                continue
            name = data.get("name", "").strip()
            description = data.get("description", "").strip()
            tactic = data.get("tactic", "").strip()
            # Extract technique info
            technique = data.get("technique", {})
            if isinstance(technique, dict):
                attack_id = technique.get("attack_id", "")
            else:
                attack_id = ""
            if not attack_id:
                continue
            # Normalise technique ID
            attack_id = str(attack_id).strip().upper()
            if not attack_id.startswith("T"):
                continue
            # Extract platforms and commands
            platforms_dict = data.get("platforms", {})
            commands = _extract_commands(platforms_dict)
            platform_str = _extract_platforms(platforms_dict)
            # Determine executor type
            executors = set()
            if isinstance(platforms_dict, dict):
                for os_executors in platforms_dict.values():
                    if isinstance(os_executors, dict):
                        executors.update(os_executors.keys())
            executor_str = ", ".join(sorted(executors)) if executors else None
            results.append({
                "mitre_technique_id": attack_id,
                "name": f"CALDERA: {name}"[:500] if name else f"CALDERA ability {ability_id}"[:500],
                "description": f"{description}\n\nTactic: {tactic}".strip()[:2000] if description else None,
                "source": "caldera",
                "platform": platform_str,
                "tool_suggested": executor_str,
                "attack_procedure": commands[:4000] if commands else None,
                "atomic_test_id": f"caldera:{ability_id}",
                "source_url": f"https://github.com/mitre/caldera/tree/master/data/abilities/{tactic}",
            })
    logger.info("Parsed %d CALDERA abilities total", len(results))
    return results
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 def sync(db: Session) -> dict:
    """Download and import CALDERA abilities as TestTemplates.
    Returns a summary dict with ``created``, ``skipped_existing``, ``total_parsed``.
    """
    tmp_dir = tempfile.mkdtemp(prefix="aegis_caldera_")
    try:
        zip_bytes = _download_zip()
        abilities_dir = _extract_zip(zip_bytes, tmp_dir)
        parsed = _parse_abilities(abilities_dir)
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)
        logger.info("Cleaned up temp directory %s", tmp_dir)
    # Pre-load existing for dedup
    existing_ids: set[str] = {
        row[0]
        for row in db.query(TestTemplate.atomic_test_id)
        .filter(TestTemplate.source == "caldera")
        .filter(TestTemplate.atomic_test_id.isnot(None))
        .all()
    }
    created = 0
    skipped = 0
    for item in parsed:
        if item["atomic_test_id"] in existing_ids:
            skipped += 1
            continue
        template = TestTemplate(
            mitre_technique_id=item["mitre_technique_id"],
            name=item["name"],
            description=item["description"],
            source=item["source"],
            source_url=item["source_url"],
            attack_procedure=item["attack_procedure"],
            platform=item["platform"],
            tool_suggested=item["tool_suggested"],
            atomic_test_id=item["atomic_test_id"],
            is_active=True,
        )
        db.add(template)
        existing_ids.add(item["atomic_test_id"])
        created += 1
    db.commit()
    summary = {
        "created": created,
        "skipped_existing": skipped,
        "total_parsed": len(parsed),
    }
    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "caldera").first()
    if ds:
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_status = "success"
        ds.last_sync_stats = summary
        db.commit()
    logger.info("CALDERA import complete — %s", summary)
    log_action(db, user_id=None, action="import_caldera",
               entity_type="test_template", entity_id=None, details=summary)
    return summary
@@ -0,0 +1,321 @@
 """Elastic Detection Rules import service.
 Downloads the Elastic detection-rules repository ZIP from GitHub, parses
 every ``.toml`` rule file under ``rules/``, extracts MITRE ATT&CK
 mappings, and creates :class:`DetectionRule` records in the database.
 Strategy
 --------
 1. Download the full repo as a ZIP archive.
 2. Extract into a temporary directory.
 3. Walk all ``.toml`` files under ``rules/``.
 4. Parse each TOML file — extract rule name, description, query (KQL),
   severity, and MITRE ATT&CK threat mappings.
 5. Create / skip ``DetectionRule`` rows keyed by ``(source, source_id)``.
 6. Clean up.
 Idempotency
 -----------
 Running the import twice does **not** create duplicates.  Existing
 rules are identified by ``source = "elastic"`` + ``source_id`` (the
 TOML filename).
 """
 import io
 import logging
 import shutil
 import tempfile
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import requests as _requests
 from sqlalchemy.orm import Session
 from app.models.detection_rule import DetectionRule
 from app.models.data_source import DataSource
 from app.services.audit_service import log_action
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 ELASTIC_ZIP_URL = (
    "https://github.com/elastic/detection-rules"
    "/archive/refs/heads/main.zip"
 )
 _DOWNLOAD_TIMEOUT = 300
 _ZIP_ROOT_PREFIX = "detection-rules-main"
 # Severity normalisation
 _SEVERITY_MAP = {
    "informational": "informational",
    "low": "low",
    "medium": "medium",
    "high": "high",
    "critical": "critical",
 }
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 def _download_zip(url: str = ELASTIC_ZIP_URL) -> bytes:
    """Download the Elastic Detection Rules ZIP and return raw bytes."""
    logger.info("Downloading Elastic Detection Rules ZIP from %s …", url)
    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    resp.raise_for_status()
    content = resp.content
    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
    return content
 def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
    """Extract *zip_bytes* into *dest* and return rules/ dir."""
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        zf.extractall(dest)
    rules_dir = Path(dest) / _ZIP_ROOT_PREFIX / "rules"
    if not rules_dir.is_dir():
        raise FileNotFoundError(
            f"Expected rules directory not found at {rules_dir}"
        )
    return rules_dir
 def _parse_toml_safe(path: Path) -> dict | None:
    """Parse a TOML file.  Uses the ``toml`` library."""
    try:
        import toml
        with open(path, "r", encoding="utf-8") as fh:
            return toml.load(fh)
    except Exception as exc:
        logger.debug("Failed to parse %s: %s", path, exc)
        return None
 def _extract_mitre_techniques(threat_list: list) -> list[str]:
    """Extract MITRE technique IDs from Elastic's ``rule.threat`` array.
    Each entry looks like::
        [[rule.threat]]
        framework = "MITRE ATT&CK"
        [rule.threat.tactic]
        name = "Credential Access"
        id = "TA0006"
        [[rule.threat.technique]]
        name = "OS Credential Dumping"
        id = "T1003"
          [[rule.threat.technique.subtechnique]]
          name = "LSASS Memory"
          id = "T1003.001"
    """
    technique_ids = []
    if not isinstance(threat_list, list):
        return technique_ids
    for threat_entry in threat_list:
        if not isinstance(threat_entry, dict):
            continue
        # Skip non-MITRE frameworks
        framework = threat_entry.get("framework", "")
        if "MITRE" not in str(framework).upper():
            continue
        techniques = threat_entry.get("technique", [])
        if not isinstance(techniques, list):
            continue
        for tech in techniques:
            if not isinstance(tech, dict):
                continue
            tech_id = tech.get("id", "")
            if tech_id and str(tech_id).upper().startswith("T"):
                technique_ids.append(str(tech_id).upper())
            # Check subtechniques
            subtechniques = tech.get("subtechnique", [])
            if isinstance(subtechniques, list):
                for subtech in subtechniques:
                    if isinstance(subtech, dict):
                        sub_id = subtech.get("id", "")
                        if sub_id and str(sub_id).upper().startswith("T"):
                            technique_ids.append(str(sub_id).upper())
    return list(set(technique_ids))
 def _parse_elastic_rules(rules_dir: Path) -> list[dict]:
    """Walk the rules directory and parse all TOML files.
    Returns a flat list of dicts, one per (rule, technique) combination.
    """
    results: list[dict] = []
    toml_files = sorted(rules_dir.rglob("*.toml"))
    logger.info("Found %d TOML files to parse", len(toml_files))
    for toml_path in toml_files:
        data = _parse_toml_safe(toml_path)
        if not data:
            continue
        rule = data.get("rule", {})
        if not isinstance(rule, dict):
            continue
        name = rule.get("name", "").strip()
        if not name:
            continue
        # Extract MITRE technique IDs
        threat_list = rule.get("threat", [])
        technique_ids = _extract_mitre_techniques(threat_list)
        if not technique_ids:
            continue
        description = rule.get("description", "")
        query = rule.get("query", "")
        severity = _SEVERITY_MAP.get(str(rule.get("severity", "")).lower())
        rule_type = rule.get("type", "query")  # query, eql, threshold, etc.
        # Determine rule format based on type
        if rule_type == "eql":
            rule_format = "eql"
        elif rule_type == "esql":
            rule_format = "esql"
        else:
            rule_format = "kql"
        # Use filename as source_id
        source_id = toml_path.name
        # Read raw content
        try:
            with open(toml_path, "r", encoding="utf-8") as fh:
                raw_content = fh.read()
        except Exception:
            raw_content = query or str(data)
        # Build source URL
        relative = str(toml_path.relative_to(rules_dir.parent)).replace("\\", "/")
        source_url = (
            f"https://github.com/elastic/detection-rules/blob/main/{relative}"
        )
        # One entry per technique
        for tech_id in technique_ids:
            results.append({
                "mitre_technique_id": tech_id,
                "title": name[:500],
                "description": str(description)[:2000] if description else None,
                "source_id": source_id,
                "source_url": source_url,
                "rule_content": query[:50000] if query else raw_content[:50000],
                "rule_format": rule_format,
                "severity": severity,
                "platforms": _infer_platforms(rules_dir, toml_path),
            })
    logger.info("Parsed %d (rule, technique) pairs total", len(results))
    return results
 def _infer_platforms(rules_dir: Path, toml_path: Path) -> list[str] | None:
    """Infer platforms from the rule's directory structure.
    Elastic organizes rules by OS:  rules/windows/, rules/linux/, etc.
    """
    relative = toml_path.relative_to(rules_dir)
    parts = [p.lower() for p in relative.parts]
    platforms = []
    if "windows" in parts:
        platforms.append("windows")
    if "linux" in parts:
        platforms.append("linux")
    if "macos" in parts:
        platforms.append("macos")
    return platforms if platforms else None
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 def sync(db: Session) -> dict:
    """Download and import Elastic detection rules.
    Returns a summary dict with ``created``, ``skipped_existing``, ``total_parsed``.
    """
    tmp_dir = tempfile.mkdtemp(prefix="aegis_elastic_")
    try:
        zip_bytes = _download_zip()
        rules_dir = _extract_zip(zip_bytes, tmp_dir)
        parsed_rules = _parse_elastic_rules(rules_dir)
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)
        logger.info("Cleaned up temp directory %s", tmp_dir)
    # Pre-load existing source_ids for dedup
    existing_ids: set[str] = {
        row[0]
        for row in db.query(DetectionRule.source_id)
        .filter(DetectionRule.source == "elastic")
        .filter(DetectionRule.source_id.isnot(None))
        .all()
    }
    created = 0
    skipped = 0
    for item in parsed_rules:
        if item["source_id"] in existing_ids:
            skipped += 1
            continue
        rule = DetectionRule(
            mitre_technique_id=item["mitre_technique_id"],
            title=item["title"],
            description=item["description"],
            source="elastic",
            source_id=item["source_id"],
            source_url=item["source_url"],
            rule_content=item["rule_content"],
            rule_format=item["rule_format"],
            severity=item["severity"],
            platforms=item["platforms"],
            is_active=True,
        )
        db.add(rule)
        existing_ids.add(item["source_id"])
        created += 1
    db.commit()
    summary = {
        "created": created,
        "skipped_existing": skipped,
        "total_parsed": len(parsed_rules),
    }
    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "elastic_rules").first()
    if ds:
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_status = "success"
        ds.last_sync_stats = summary
        db.commit()
    logger.info("Elastic import complete — %s", summary)
    log_action(db, user_id=None, action="import_elastic_rules",
               entity_type="detection_rule", entity_id=None, details=summary)
    return summary
@@ -0,0 +1,375 @@
 """LOLBAS and GTFOBins import service.
 Downloads the LOLBAS (Windows) and GTFOBins (Linux) repositories,
 parses their YAML / Markdown files, and creates :class:`TestTemplate`
 records mapped to MITRE ATT&CK techniques.
 LOLBAS
 ------
 - ZIP from ``LOLBAS-Project/LOLBAS``
 - YAML files in ``yml/OSBinaries/``, ``yml/OSLibraries/``, ``yml/OSScripts/``
 - Each YAML contains: Name, Description, Commands (list with MitreID)
 GTFOBins
 --------
 - ZIP from ``GTFOBins/GTFOBins.github.io``
 - Markdown files in ``_gtfobins/``
 - Each Markdown has YAML front-matter with function names
 - Functions mapped to MITRE via a static dictionary
 Idempotency
 -----------
 Deduplication keys:
 - LOLBAS: ``source + Name + MitreID``  → stored in ``atomic_test_id``
 - GTFOBins: ``source + binary_name + function`` → stored in ``atomic_test_id``
 """
 import io
 import logging
 import re
 import shutil
 import tempfile
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import requests as _requests
 import yaml
 from sqlalchemy.orm import Session
 from app.models.test_template import TestTemplate
 from app.models.data_source import DataSource
 from app.services.audit_service import log_action
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 LOLBAS_ZIP_URL = (
    "https://github.com/LOLBAS-Project/LOLBAS"
    "/archive/refs/heads/master.zip"
 )
 GTFOBINS_ZIP_URL = (
    "https://github.com/GTFOBins/GTFOBins.github.io"
    "/archive/refs/heads/master.zip"
 )
 _DOWNLOAD_TIMEOUT = 300
 # GTFOBins function → MITRE technique mapping
 _GTFOBINS_FUNCTION_MAP: dict[str, str] = {
    "shell": "T1059",
    "command": "T1059",
    "reverse-shell": "T1059",
    "non-interactive-reverse-shell": "T1059",
    "bind-shell": "T1059",
    "non-interactive-bind-shell": "T1059",
    "file-upload": "T1105",
    "file-download": "T1105",
    "file-write": "T1105",
    "file-read": "T1005",
    "library-load": "T1129",
    "sudo": "T1548.003",
    "suid": "T1548.001",
    "capabilities": "T1548",
    "limited-suid": "T1548.001",
 }
 # ---------------------------------------------------------------------------
 # Shared helpers
 # ---------------------------------------------------------------------------
 def _download_zip(url: str) -> bytes:
    """Download a ZIP from *url* and return raw bytes."""
    logger.info("Downloading ZIP from %s …", url)
    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    resp.raise_for_status()
    content = resp.content
    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
    return content
 def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
    """Extract *zip_bytes* into *dest* and return the root directory."""
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        zf.extractall(dest)
    return Path(dest)
 # ---------------------------------------------------------------------------
 # LOLBAS import
 # ---------------------------------------------------------------------------
 def _parse_lolbas(root_dir: Path) -> list[dict]:
    """Parse LOLBAS YAML files and return template dicts."""
    results: list[dict] = []
    lolbas_root = root_dir / "LOLBAS-master"
    yaml_dirs = [
        lolbas_root / "yml" / "OSBinaries",
        lolbas_root / "yml" / "OSLibraries",
        lolbas_root / "yml" / "OSScripts",
    ]
    yaml_files = []
    for d in yaml_dirs:
        if d.is_dir():
            yaml_files.extend(sorted(d.rglob("*.yml")))
    logger.info("LOLBAS: Found %d YAML files", len(yaml_files))
    for yaml_path in yaml_files:
        try:
            with open(yaml_path, "r", encoding="utf-8") as fh:
                data = yaml.safe_load(fh)
        except Exception as exc:
            logger.debug("Failed to parse %s: %s", yaml_path, exc)
            continue
        if not isinstance(data, dict):
            continue
        binary_name = data.get("Name", "").strip()
        if not binary_name:
            continue
        description = data.get("Description", "")
        commands = data.get("Commands", [])
        if not isinstance(commands, list):
            continue
        for cmd_entry in commands:
            if not isinstance(cmd_entry, dict):
                continue
            mitre_id = cmd_entry.get("MitreID")
            if not mitre_id:
                continue
            # Normalise the MITRE ID
            mitre_id = str(mitre_id).strip().upper()
            if not mitre_id.startswith("T"):
                continue
            command = cmd_entry.get("Command", "")
            usecase = cmd_entry.get("Usecase", "")
            cmd_description = cmd_entry.get("Description", "")
            # Dedup key
            dedup_key = f"lolbas:{binary_name}:{mitre_id}"
            procedure = []
            if cmd_description:
                procedure.append(f"Description: {cmd_description}")
            if usecase:
                procedure.append(f"Use case: {usecase}")
            if command:
                procedure.append(f"Command: {command}")
            results.append({
                "mitre_technique_id": mitre_id,
                "name": f"LOLBAS: {binary_name} — {usecase or cmd_description or mitre_id}"[:500],
                "description": f"{description}\n\n{cmd_description}".strip()[:2000] if description else cmd_description[:2000] if cmd_description else None,
                "source": "lolbas",
                "platform": "windows",
                "tool_suggested": binary_name,
                "attack_procedure": "\n".join(procedure)[:4000] if procedure else None,
                "atomic_test_id": dedup_key,
                "source_url": f"https://lolbas-project.github.io/lolbas/Binaries/{binary_name}/",
            })
    logger.info("LOLBAS: Parsed %d templates", len(results))
    return results
 # ---------------------------------------------------------------------------
 # GTFOBins import
 # ---------------------------------------------------------------------------
 def _parse_gtfobins(root_dir: Path) -> list[dict]:
    """Parse GTFOBins markdown files and return template dicts."""
    results: list[dict] = []
    gtfobins_root = root_dir / "GTFOBins.github.io-master" / "_gtfobins"
    if not gtfobins_root.is_dir():
        logger.warning("GTFOBins directory not found at %s", gtfobins_root)
        return results
    md_files = sorted(gtfobins_root.glob("*.md"))
    logger.info("GTFOBins: Found %d markdown files", len(md_files))
    for md_path in md_files:
        binary_name = md_path.stem  # e.g. "awk"
        try:
            with open(md_path, "r", encoding="utf-8") as fh:
                content = fh.read()
        except Exception as exc:
            logger.debug("Failed to read %s: %s", md_path, exc)
            continue
        # Extract YAML front-matter
        front_matter = _extract_front_matter(content)
        if not front_matter:
            continue
        functions = front_matter.get("functions", {})
        if not isinstance(functions, dict):
            continue
        for func_name, func_data in functions.items():
            # Map function to MITRE technique
            mitre_id = _GTFOBINS_FUNCTION_MAP.get(func_name.lower())
            if not mitre_id:
                continue
            # Extract code examples from function data
            examples = []
            if isinstance(func_data, list):
                for entry in func_data:
                    if isinstance(entry, dict):
                        code = entry.get("code", "")
                        if code:
                            examples.append(str(code))
                    elif isinstance(entry, str):
                        examples.append(entry)
            procedure = "\n\n".join(examples) if examples else None
            dedup_key = f"gtfobins:{binary_name}:{func_name}"
            results.append({
                "mitre_technique_id": mitre_id,
                "name": f"GTFOBins: {binary_name} — {func_name}"[:500],
                "description": f"Abuse {binary_name} binary for {func_name} on Linux/Unix."[:2000],
                "source": "gtfobins",
                "platform": "linux",
                "tool_suggested": binary_name,
                "attack_procedure": procedure[:4000] if procedure else None,
                "atomic_test_id": dedup_key,
                "source_url": f"https://gtfobins.github.io/gtfobins/{binary_name}/",
            })
    logger.info("GTFOBins: Parsed %d templates", len(results))
    return results
 def _extract_front_matter(content: str) -> dict | None:
    """Extract YAML front-matter from a markdown file."""
    match = re.match(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
    if not match:
        return None
    try:
        return yaml.safe_load(match.group(1))
    except Exception:
        return None
 # ---------------------------------------------------------------------------
 # Upsert logic
 # ---------------------------------------------------------------------------
 def _upsert_templates(db: Session, items: list[dict], source_name: str) -> dict:
    """Insert templates, skipping existing ones by atomic_test_id."""
    existing_ids: set[str] = {
        row[0]
        for row in db.query(TestTemplate.atomic_test_id)
        .filter(TestTemplate.source == source_name)
        .filter(TestTemplate.atomic_test_id.isnot(None))
        .all()
    }
    created = 0
    skipped = 0
    for item in items:
        if item["atomic_test_id"] in existing_ids:
            skipped += 1
            continue
        template = TestTemplate(
            mitre_technique_id=item["mitre_technique_id"],
            name=item["name"],
            description=item["description"],
            source=item["source"],
            source_url=item.get("source_url"),
            attack_procedure=item.get("attack_procedure"),
            platform=item["platform"],
            tool_suggested=item.get("tool_suggested"),
            atomic_test_id=item["atomic_test_id"],
            is_active=True,
        )
        db.add(template)
        existing_ids.add(item["atomic_test_id"])
        created += 1
    db.commit()
    return {"created": created, "skipped_existing": skipped, "total_parsed": len(items)}
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 def sync(db: Session) -> dict:
    """Import LOLBAS templates.
    Returns a summary dict with ``created``, ``skipped_existing``, ``total_parsed``.
    """
    tmp_dir = tempfile.mkdtemp(prefix="aegis_lolbas_")
    try:
        zip_bytes = _download_zip(LOLBAS_ZIP_URL)
        root_dir = _extract_zip(zip_bytes, tmp_dir)
        parsed = _parse_lolbas(root_dir)
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)
    summary = _upsert_templates(db, parsed, "lolbas")
    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "lolbas").first()
    if ds:
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_status = "success"
        ds.last_sync_stats = summary
        db.commit()
    logger.info("LOLBAS import complete — %s", summary)
    log_action(db, user_id=None, action="import_lolbas",
               entity_type="test_template", entity_id=None, details=summary)
    return summary
 def sync_gtfobins(db: Session) -> dict:
    """Import GTFOBins templates.
    Returns a summary dict with ``created``, ``skipped_existing``, ``total_parsed``.
    """
    tmp_dir = tempfile.mkdtemp(prefix="aegis_gtfobins_")
    try:
        zip_bytes = _download_zip(GTFOBINS_ZIP_URL)
        root_dir = _extract_zip(zip_bytes, tmp_dir)
        parsed = _parse_gtfobins(root_dir)
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)
    summary = _upsert_templates(db, parsed, "gtfobins")
    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "gtfobins").first()
    if ds:
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_status = "success"
        ds.last_sync_stats = summary
        db.commit()
    logger.info("GTFOBins import complete — %s", summary)
    log_action(db, user_id=None, action="import_gtfobins",
               entity_type="test_template", entity_id=None, details=summary)
    return summary
@@ -0,0 +1,308 @@
 """Sigma Rules import service.
 Downloads the SigmaHQ repository ZIP from GitHub, parses every YAML rule
 file under ``rules/``, extracts MITRE ATT&CK tags, and creates
 :class:`DetectionRule` records in the database.
 Strategy
 --------
 1. Download the full SigmaHQ repo as a ZIP archive.
 2. Extract in a temporary directory.
 3. Walk all ``.yml`` files under ``rules/``.
 4. Parse each YAML file — extract title, description, logsource,
   detection tags, severity (``level``), and the raw YAML content.
 5. Filter: only import rules that have at least one ``attack.tXXXX`` tag.
 6. Create / skip ``DetectionRule`` rows keyed by ``(source, source_id)``.
 7. Clean up the temporary directory.
 Idempotency
 -----------
 Running the import twice does **not** create duplicates.  Existing
 rules are identified by ``source = "sigma"`` + ``source_id`` (relative
 file path) and simply skipped.
 """
 import io
 import logging
 import re
 import shutil
 import tempfile
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import requests as _requests
 import yaml
 from sqlalchemy.orm import Session
 from app.models.detection_rule import DetectionRule
 from app.models.data_source import DataSource
 from app.services.audit_service import log_action
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 SIGMA_ZIP_URL = (
    "https://github.com/SigmaHQ/sigma/archive/refs/heads/main.zip"
 )
 _DOWNLOAD_TIMEOUT = 300
 _ZIP_ROOT_PREFIX = "sigma-main"
 # Regex to extract MITRE ATT&CK technique IDs from Sigma tags
 # e.g. "attack.t1059.001" → "T1059.001"
 _ATTACK_TAG_RE = re.compile(r"attack\.(t\d{4}(?:\.\d{3})?)", re.IGNORECASE)
 # Sigma severity levels
 _SEVERITY_MAP = {
    "informational": "informational",
    "low": "low",
    "medium": "medium",
    "high": "high",
    "critical": "critical",
 }
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 def _download_zip(url: str = SIGMA_ZIP_URL) -> bytes:
    """Download the SigmaHQ ZIP and return raw bytes."""
    logger.info("Downloading SigmaHQ ZIP from %s …", url)
    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
    resp.raise_for_status()
    content = resp.content
    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
    return content
 def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
    """Extract *zip_bytes* into *dest* and return the path to rules/ dir."""
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        zf.extractall(dest)
    rules_dir = Path(dest) / _ZIP_ROOT_PREFIX / "rules"
    if not rules_dir.is_dir():
        raise FileNotFoundError(
            f"Expected rules directory not found at {rules_dir}"
        )
    return rules_dir
 def _extract_attack_tags(tags: list) -> list[str]:
    """Extract MITRE technique IDs from Sigma tag list.
    Example input:  ["attack.defense_evasion", "attack.t1059.001", "cve.2021.44228"]
    Example output: ["T1059.001"]
    """
    technique_ids = []
    for tag in tags:
        m = _ATTACK_TAG_RE.match(str(tag).strip())
        if m:
            technique_ids.append(m.group(1).upper())
    return list(set(technique_ids))
 def _parse_sigma_rules(rules_dir: Path) -> list[dict]:
    """Walk the rules directory and parse all Sigma YAML files.
    Returns a flat list of dicts, one per (rule, technique) combination.
    A single Sigma rule tagged with N techniques produces N entries.
    """
    results: list[dict] = []
    yaml_files = sorted(rules_dir.rglob("*.yml"))
    logger.info("Found %d YAML files to parse", len(yaml_files))
    for yaml_path in yaml_files:
        relative_path = str(yaml_path.relative_to(rules_dir.parent))
        try:
            with open(yaml_path, "r", encoding="utf-8") as fh:
                data = yaml.safe_load(fh)
        except Exception as exc:
            logger.debug("Failed to parse %s: %s", yaml_path, exc)
            continue
        if not isinstance(data, dict):
            continue
        title = data.get("title", "").strip()
        if not title:
            continue
        # Extract ATT&CK technique IDs from tags
        tags = data.get("tags", [])
        if not isinstance(tags, list):
            continue
        technique_ids = _extract_attack_tags(tags)
        if not technique_ids:
            continue  # Skip rules without ATT&CK mapping
        description = data.get("description", "")
        level = str(data.get("level", "")).lower()
        severity = _SEVERITY_MAP.get(level)
        # Extract logsource
        logsource = data.get("logsource", {})
        if not isinstance(logsource, dict):
            logsource = {}
        # Read full YAML content for storage
        try:
            with open(yaml_path, "r", encoding="utf-8") as fh:
                raw_content = fh.read()
        except Exception:
            raw_content = yaml.dump(data, default_flow_style=False)
        # False positive assessment
        falsepositives = data.get("falsepositives", [])
        if isinstance(falsepositives, list) and len(falsepositives) > 3:
            fp_rate = "high"
        elif isinstance(falsepositives, list) and len(falsepositives) > 1:
            fp_rate = "medium"
        else:
            fp_rate = "low"
        # Create one entry per technique
        for tech_id in technique_ids:
            source_url = (
                f"https://github.com/SigmaHQ/sigma/blob/main/"
                f"{relative_path.replace(chr(92), '/')}"
            )
            results.append({
                "mitre_technique_id": tech_id,
                "title": title[:500],
                "description": str(description)[:2000] if description else None,
                "source_id": relative_path,
                "source_url": source_url,
                "rule_content": raw_content,
                "severity": severity,
                "log_sources": logsource if logsource else None,
                "false_positive_rate": fp_rate,
                "platforms": _platforms_from_logsource(logsource),
            })
    logger.info("Parsed %d (rule, technique) pairs total", len(results))
    return results
 def _platforms_from_logsource(logsource: dict) -> list[str]:
    """Infer platform list from Sigma logsource."""
    platforms = []
    product = str(logsource.get("product", "")).lower()
    service = str(logsource.get("service", "")).lower()
    if "windows" in product or "windows" in service:
        platforms.append("windows")
    if "linux" in product or "linux" in service:
        platforms.append("linux")
    if "macos" in product or "macos" in service:
        platforms.append("macos")
    # Sysmon → Windows
    if "sysmon" in service and "windows" not in platforms:
        platforms.append("windows")
    return platforms if platforms else None
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 def sync(db: Session) -> dict:
    """Download and import Sigma detection rules.
    Parameters
    ----------
    db : Session
        Active SQLAlchemy database session.
    Returns
    -------
    dict
        Summary with ``created``, ``skipped_existing``, ``total_parsed``.
    """
    tmp_dir = tempfile.mkdtemp(prefix="aegis_sigma_")
    try:
        zip_bytes = _download_zip()
        rules_dir = _extract_zip(zip_bytes, tmp_dir)
        parsed_rules = _parse_sigma_rules(rules_dir)
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)
        logger.info("Cleaned up temp directory %s", tmp_dir)
    # Pre-load existing source_ids for dedup
    existing_ids: set[str] = {
        row[0]
        for row in db.query(DetectionRule.source_id)
        .filter(DetectionRule.source == "sigma")
        .filter(DetectionRule.source_id.isnot(None))
        .all()
    }
    created = 0
    skipped = 0
    for item in parsed_rules:
        # Dedup key: source_id (relative path). A rule file may produce
        # multiple entries (one per technique), but we deduplicate by
        # source_id so re-runs are safe.  For multi-technique rules we
        # only skip if the exact same source_id is already present.
        dedup_key = f"{item['source_id']}::{item['mitre_technique_id']}"
        if item["source_id"] in existing_ids:
            skipped += 1
            continue
        rule = DetectionRule(
            mitre_technique_id=item["mitre_technique_id"],
            title=item["title"],
            description=item["description"],
            source="sigma",
            source_id=item["source_id"],
            source_url=item["source_url"],
            rule_content=item["rule_content"],
            rule_format="sigma_yaml",
            severity=item["severity"],
            platforms=item["platforms"],
            log_sources=item["log_sources"],
            false_positive_rate=item["false_positive_rate"],
            is_active=True,
        )
        db.add(rule)
        existing_ids.add(item["source_id"])
        created += 1
    db.commit()
    summary = {
        "created": created,
        "skipped_existing": skipped,
        "total_parsed": len(parsed_rules),
    }
    # Update DataSource record
    ds = db.query(DataSource).filter(DataSource.name == "sigma").first()
    if ds:
        ds.last_sync_at = datetime.utcnow()
        ds.last_sync_status = "success"
        ds.last_sync_stats = summary
        db.commit()
    logger.info("Sigma import complete — %s", summary)
    log_action(
        db,
        user_id=None,
        action="import_sigma_rules",
        entity_type="detection_rule",
        entity_id=None,
        details=summary,
    )
    return summary
@@ -10,6 +10,8 @@ boto3
 apscheduler
 requests
 pyyaml
 pySigma
 toml
 taxii2-client
 python-multipart
 pydantic-settings
@@ -11,6 +11,7 @@ import ReportsPage from "./pages/ReportsPage";
 import SystemPage from "./pages/SystemPage";
 import UsersPage from "./pages/UsersPage";
 import AuditLogPage from "./pages/AuditLogPage";
 import DataSourcesPage from "./pages/DataSourcesPage";
 import Layout from "./components/Layout";
 import ProtectedRoute from "./components/ProtectedRoute";
@@ -61,6 +62,14 @@ export default function App() {
            </ProtectedRoute>
          }
        />
        <Route
          path="/data-sources"
          element={
            <ProtectedRoute roles={["admin"]}>
              <DataSourcesPage />
            </ProtectedRoute>
          }
        />
      </Route>
      {/* Catch-all → dashboard */}
@@ -0,0 +1,79 @@
 import client from "./client";
 export interface DataSource {
  id: string;
  name: string;
  display_name: string;
  type: string;
  url: string | null;
  description: string | null;
  is_enabled: boolean;
  last_sync_at: string | null;
  last_sync_status: string | null;
  last_sync_stats: Record<string, unknown> | null;
  sync_frequency: string | null;
  config: Record<string, unknown> | null;
  created_at: string | null;
 }
 export interface SyncResult {
  message: string;
  source: string;
  stats: Record<string, unknown>;
 }
 export interface SyncAllResult {
  message: string;
  results: Array<{
    source: string;
    status: string;
    stats?: Record<string, unknown>;
    detail?: string;
  }>;
 }
 export interface DataSourceStats {
  id: string;
  name: string;
  display_name: string;
  type: string;
  is_enabled: boolean;
  last_sync_at: string | null;
  last_sync_status: string | null;
  last_sync_stats: Record<string, unknown> | null;
  total_templates: number;
  total_rules: number;
 }
 /** List all data sources. */
 export async function getDataSources(): Promise<DataSource[]> {
  const { data } = await client.get<DataSource[]>("/data-sources");
  return data;
 }
 /** Update a data source (enable/disable, config). */
 export async function updateDataSource(
  id: string,
  body: Partial<{ is_enabled: boolean; sync_frequency: string; config: Record<string, unknown> }>
 ): Promise<{ message: string; id: string }> {
  const { data } = await client.patch(`/data-sources/${id}`, body);
  return data;
 }
 /** Trigger sync for a specific data source. */
 export async function syncDataSource(id: string): Promise<SyncResult> {
  const { data } = await client.post<SyncResult>(`/data-sources/${id}/sync`);
  return data;
 }
 /** Trigger sync for all enabled data sources. */
 export async function syncAllDataSources(): Promise<SyncAllResult> {
  const { data } = await client.post<SyncAllResult>("/data-sources/sync-all");
  return data;
 }
 /** Get stats for a specific data source. */
 export async function getDataSourceStats(id: string): Promise<DataSourceStats> {
  const { data } = await client.get<DataSourceStats>(`/data-sources/${id}/stats`);
  return data;
 }
@@ -12,6 +12,7 @@ import {
  ChevronDown,
  ListChecks,
  ClipboardList,
  Database,
 } from "lucide-react";
 import { useAuth } from "../context/AuthContext";
@@ -41,6 +42,7 @@ const mainLinks: NavItem[] = [
 const adminLinks: NavItem[] = [
  { to: "/users", label: "Users", icon: Users },
  { to: "/audit", label: "Audit Log", icon: FileText },
  { to: "/data-sources", label: "Data Sources", icon: Database },
  { to: "/system", label: "System", icon: Settings },
 ];
@@ -0,0 +1,375 @@
 import { useState } from "react";
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
 import {
  Loader2,
  RefreshCw,
  Database,
  CheckCircle,
  XCircle,
  AlertCircle,
  Clock,
  ToggleLeft,
  ToggleRight,
  Play,
  ExternalLink,
  Shield,
  Search,
  Swords,
  Bug,
 } from "lucide-react";
 import {
  getDataSources,
  updateDataSource,
  syncDataSource,
  syncAllDataSources,
  type DataSource,
  type SyncAllResult,
 } from "../api/data-sources";
 /** Map source type to visual props. */
 function typeProps(type: string) {
  switch (type) {
    case "attack_procedure":
      return { label: "Attack Procedure", color: "text-red-400 bg-red-900/50 border-red-500/30", icon: Swords };
    case "detection_rule":
      return { label: "Detection Rule", color: "text-blue-400 bg-blue-900/50 border-blue-500/30", icon: Shield };
    case "threat_intel":
      return { label: "Threat Intel", color: "text-purple-400 bg-purple-900/50 border-purple-500/30", icon: Search };
    case "defensive_technique":
      return { label: "Defensive", color: "text-green-400 bg-green-900/50 border-green-500/30", icon: Shield };
    default:
      return { label: type, color: "text-gray-400 bg-gray-800/50 border-gray-600/30", icon: Bug };
  }
 }
 function statusBadge(status: string | null) {
  if (!status) return null;
  switch (status) {
    case "success":
      return (
        <span className="inline-flex items-center gap-1 rounded-full border border-green-500/30 bg-green-900/50 px-2 py-0.5 text-xs font-medium text-green-400">
          <CheckCircle className="h-3 w-3" /> Success
        </span>
      );
    case "error":
      return (
        <span className="inline-flex items-center gap-1 rounded-full border border-red-500/30 bg-red-900/50 px-2 py-0.5 text-xs font-medium text-red-400">
          <XCircle className="h-3 w-3" /> Error
        </span>
      );
    case "in_progress":
      return (
        <span className="inline-flex items-center gap-1 rounded-full border border-yellow-500/30 bg-yellow-900/50 px-2 py-0.5 text-xs font-medium text-yellow-400">
          <Loader2 className="h-3 w-3 animate-spin" /> In Progress
        </span>
      );
    default:
      return (
        <span className="inline-flex rounded-full border border-gray-600/30 bg-gray-800/50 px-2 py-0.5 text-xs text-gray-400">
          {status}
        </span>
      );
  }
 }
 export default function DataSourcesPage() {
  const queryClient = useQueryClient();
  const [syncingId, setSyncingId] = useState<string | null>(null);
  const [syncAllResult, setSyncAllResult] = useState<SyncAllResult | null>(null);
  // ── Queries ─────────────────────────────────────────────────────
  const {
    data: sources,
    isLoading,
    error,
  } = useQuery({
    queryKey: ["data-sources"],
    queryFn: getDataSources,
  });
  // ── Toggle enable/disable ───────────────────────────────────────
  const toggleMutation = useMutation({
    mutationFn: ({ id, enabled }: { id: string; enabled: boolean }) =>
      updateDataSource(id, { is_enabled: enabled }),
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ["data-sources"] });
    },
  });
  // ── Sync individual source ──────────────────────────────────────
  const syncMutation = useMutation({
    mutationFn: (id: string) => syncDataSource(id),
    onSuccess: () => {
      setSyncingId(null);
      queryClient.invalidateQueries({ queryKey: ["data-sources"] });
    },
    onError: () => {
      setSyncingId(null);
      queryClient.invalidateQueries({ queryKey: ["data-sources"] });
    },
  });
  // ── Sync all ────────────────────────────────────────────────────
  const syncAllMutation = useMutation({
    mutationFn: syncAllDataSources,
    onSuccess: (data) => {
      setSyncAllResult(data);
      queryClient.invalidateQueries({ queryKey: ["data-sources"] });
    },
  });
  const handleSync = (id: string) => {
    setSyncingId(id);
    syncMutation.mutate(id);
  };
  const formatDate = (dateStr: string | null) => {
    if (!dateStr) return "Never";
    const date = new Date(dateStr);
    return date.toLocaleString("en-US", { dateStyle: "medium", timeStyle: "short" });
  };
  const formatStats = (stats: Record<string, unknown> | null) => {
    if (!stats) return null;
    return Object.entries(stats)
      .filter(([k]) => k !== "error")
      .map(([k, v]) => `${k.replace(/_/g, " ")}: ${v}`)
      .join(" | ");
  };
  return (
    <div className="space-y-6">
      {/* Header */}
      <div className="flex items-center justify-between">
        <div>
          <h1 className="text-2xl font-bold text-white">Data Sources</h1>
          <p className="mt-1 text-sm text-gray-400">
            Manage external data sources for test templates and detection rules
          </p>
        </div>
        <button
          onClick={() => syncAllMutation.mutate()}
          disabled={syncAllMutation.isPending}
          className="flex items-center gap-2 rounded-lg bg-cyan-600 px-4 py-2.5 text-sm font-medium text-white hover:bg-cyan-500 disabled:opacity-50 transition-colors"
        >
          {syncAllMutation.isPending ? (
            <Loader2 className="h-4 w-4 animate-spin" />
          ) : (
            <RefreshCw className="h-4 w-4" />
          )}
          {syncAllMutation.isPending ? "Syncing All..." : "Sync All"}
        </button>
      </div>
      {/* Sync All Result */}
      {syncAllResult && (
        <div className="rounded-xl border border-cyan-500/30 bg-gray-900 p-4">
          <div className="flex items-center justify-between mb-3">
            <h3 className="text-sm font-semibold text-white">Sync All Results</h3>
            <button
              onClick={() => setSyncAllResult(null)}
              className="text-gray-400 hover:text-white text-xs"
            >
              Dismiss
            </button>
          </div>
          <div className="space-y-2">
            {syncAllResult.results.map((r, i) => (
              <div key={i} className="flex items-center gap-3 text-sm">
                {r.status === "success" ? (
                  <CheckCircle className="h-4 w-4 text-green-400 shrink-0" />
                ) : r.status === "error" ? (
                  <XCircle className="h-4 w-4 text-red-400 shrink-0" />
                ) : (
                  <AlertCircle className="h-4 w-4 text-yellow-400 shrink-0" />
                )}
                <span className="text-gray-300 font-medium">{r.source}</span>
                {r.stats && (
                  <span className="text-gray-500 text-xs">
                    {formatStats(r.stats as Record<string, unknown>)}
                  </span>
                )}
                {r.detail && (
                  <span className="text-gray-500 text-xs">{r.detail}</span>
                )}
              </div>
            ))}
          </div>
        </div>
      )}
      {/* Loading */}
      {isLoading && (
        <div className="flex items-center justify-center py-16">
          <Loader2 className="h-8 w-8 animate-spin text-cyan-400" />
        </div>
      )}
      {/* Error */}
      {error && (
        <div className="rounded-xl border border-red-500/30 bg-red-900/20 p-6 text-center">
          <AlertCircle className="mx-auto h-8 w-8 text-red-400" />
          <p className="mt-2 text-sm text-red-400">
            Failed to load data sources: {(error as Error)?.message}
          </p>
        </div>
      )}
      {/* Data Sources Table */}
      {sources && sources.length > 0 && (
        <div className="rounded-xl border border-gray-800 bg-gray-900 overflow-hidden">
          <div className="overflow-x-auto">
            <table className="w-full text-left text-sm">
              <thead>
                <tr className="border-b border-gray-800 bg-gray-900/50">
                  <th className="px-4 py-3 font-medium text-gray-400">Source</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Type</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Status</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Last Sync</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Stats</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Enabled</th>
                  <th className="px-4 py-3 font-medium text-gray-400">Actions</th>
                </tr>
              </thead>
              <tbody>
                {sources.map((src: DataSource) => {
                  const tp = typeProps(src.type);
                  const TypeIcon = tp.icon;
                  const isSyncing = syncingId === src.id;
                  return (
                    <tr
                      key={src.id}
                      className="border-b border-gray-800/50 hover:bg-gray-800/30 transition-colors"
                    >
                      {/* Source */}
                      <td className="px-4 py-3">
                        <div className="flex items-center gap-3">
                          <div className="rounded-lg bg-gray-800 p-2">
                            <Database className="h-4 w-4 text-cyan-400" />
                          </div>
                          <div>
                            <p className="font-medium text-gray-200">{src.display_name}</p>
                            <div className="flex items-center gap-2">
                              <span className="text-xs text-gray-500 font-mono">{src.name}</span>
                              {src.url && (
                                <a
                                  href={src.url}
                                  target="_blank"
                                  rel="noreferrer"
                                  className="text-gray-500 hover:text-cyan-400"
                                >
                                  <ExternalLink className="h-3 w-3" />
                                </a>
                              )}
                            </div>
                          </div>
                        </div>
                      </td>
                      {/* Type */}
                      <td className="px-4 py-3">
                        <span
                          className={`inline-flex items-center gap-1 rounded-full border px-2 py-0.5 text-xs font-medium ${tp.color}`}
                        >
                          <TypeIcon className="h-3 w-3" />
                          {tp.label}
                        </span>
                      </td>
                      {/* Sync Status */}
                      <td className="px-4 py-3">
                        {isSyncing ? statusBadge("in_progress") : statusBadge(src.last_sync_status)}
                      </td>
                      {/* Last Sync */}
                      <td className="px-4 py-3">
                        <div className="flex items-center gap-1.5 text-xs text-gray-400">
                          <Clock className="h-3.5 w-3.5" />
                          {formatDate(src.last_sync_at)}
                        </div>
                        {src.sync_frequency && (
                          <span className="text-[10px] text-gray-600">
                            Frequency: {src.sync_frequency}
                          </span>
                        )}
                      </td>
                      {/* Stats */}
                      <td className="px-4 py-3">
                        {src.last_sync_stats ? (
                          <span className="text-xs text-gray-400">
                            {formatStats(src.last_sync_stats)}
                          </span>
                        ) : (
                          <span className="text-xs text-gray-600">-</span>
                        )}
                      </td>
                      {/* Toggle */}
                      <td className="px-4 py-3">
                        <button
                          onClick={() =>
                            toggleMutation.mutate({
                              id: src.id,
                              enabled: !src.is_enabled,
                            })
                          }
                          disabled={toggleMutation.isPending}
                          className={`flex items-center gap-1 text-xs font-medium transition-colors ${
                            src.is_enabled
                              ? "text-green-400 hover:text-green-300"
                              : "text-gray-500 hover:text-gray-400"
                          }`}
                        >
                          {src.is_enabled ? (
                            <>
                              <ToggleRight className="h-5 w-5" />
                              On
                            </>
                          ) : (
                            <>
                              <ToggleLeft className="h-5 w-5" />
                              Off
                            </>
                          )}
                        </button>
                      </td>
                      {/* Actions */}
                      <td className="px-4 py-3">
                        <button
                          onClick={() => handleSync(src.id)}
                          disabled={isSyncing || !src.is_enabled}
                          className="flex items-center gap-1.5 rounded-lg bg-gray-800 border border-gray-700 px-3 py-1.5 text-xs font-medium text-gray-300 hover:bg-gray-700 hover:text-white disabled:opacity-40 transition-colors"
                        >
                          {isSyncing ? (
                            <Loader2 className="h-3.5 w-3.5 animate-spin" />
                          ) : (
                            <Play className="h-3.5 w-3.5" />
                          )}
                          {isSyncing ? "Syncing..." : "Sync"}
                        </button>
                      </td>
                    </tr>
                  );
                })}
              </tbody>
            </table>
          </div>
        </div>
      )}
      {/* Empty State */}
      {sources && sources.length === 0 && (
        <div className="rounded-xl border border-gray-800 bg-gray-900 p-12 text-center">
          <Database className="mx-auto h-12 w-12 text-gray-600" />
          <h3 className="mt-4 text-lg font-medium text-gray-300">No Data Sources</h3>
          <p className="mt-1 text-sm text-gray-500">
            Run the data sources seed script to register initial sources.
          </p>
        </div>
      )}
    </div>
  );
 }