feat(phase-11): implement Red/Blue business logic services (T-106, T-107, T-108)

T-106: Create test_workflow_service.py with state-machine transitions for the complete test lifecycle (draft -> red_executing -> blue_evaluating -> in_review -> validated/rejected), dual validation by Red/Blue leads, and reopen capability with field cleanup. T-107: Update status_service.py to use detection_result from Blue Team instead of legacy result field, and differentiate between partial progress (some validated) vs all-in-progress states. T-108: Create atomic_import_service.py that downloads the Atomic Red Team repo as a ZIP (avoiding API rate limits), parses all atomics YAML files, and creates idempotent TestTemplate records mapped to MITRE techniques. Includes validation tests for all three tasks (19 checks total).
2026-02-09 09:58:54 +01:00
parent 086cc5c8bc
commit 7af6be10be
23 changed files with 2053 additions and 45 deletions
@@ -0,0 +1,231 @@
+"""Atomic Red Team import service.
+
+Downloads the Atomic Red Team repository ZIP from GitHub, parses every
+``atomics/T*/T*.yaml`` file, and upserts :class:`TestTemplate` records
+into the database.
+
+Strategy
+--------
+The GitHub REST API without authentication only allows 60 req/hour.
+Since the Atomic Red Team repo contains 1 500+ YAML files we avoid
+per-file requests entirely.  Instead we:
+
+1. Download the full repo as a ZIP archive (~40 MB).
+2. Extract in a temporary directory.
+3. Walk ``atomics/T*/T*.yaml`` files parsing them with PyYAML.
+4. Create / update ``TestTemplate`` rows keyed by ``atomic_test_id``.
+5. Clean up the temporary directory.
+
+Idempotency
+-----------
+Running the import twice does **not** create duplicates.  Existing
+templates are identified by their ``atomic_test_id`` and simply skipped.
+"""
+
+import io
+import logging
+import os
+import shutil
+import tempfile
+import zipfile
+from pathlib import Path
+
+import requests as _requests
+import yaml
+from sqlalchemy.orm import Session
+
+from app.models.test_template import TestTemplate
+from app.services.audit_service import log_action
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+ATOMIC_RT_ZIP_URL = (
+    "https://github.com/redcanaryco/atomic-red-team"
+    "/archive/refs/heads/master.zip"
+)
+
+# Request timeout for the ZIP download (seconds)
+_DOWNLOAD_TIMEOUT = 300
+
+# Top-level directory name inside the ZIP
+_ZIP_ROOT_PREFIX = "atomic-red-team-master"
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _download_zip(url: str = ATOMIC_RT_ZIP_URL) -> bytes:
+    """Download the Atomic Red Team ZIP and return its raw bytes."""
+    logger.info("Downloading Atomic Red Team ZIP from %s …", url)
+    resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
+    resp.raise_for_status()
+    content = resp.content
+    logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
+    return content
+
+
+def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
+    """Extract *zip_bytes* into *dest* and return the path to the atomics/ dir."""
+    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
+        zf.extractall(dest)
+    atomics_dir = Path(dest) / _ZIP_ROOT_PREFIX / "atomics"
+    if not atomics_dir.is_dir():
+        raise FileNotFoundError(
+            f"Expected atomics directory not found at {atomics_dir}"
+        )
+    return atomics_dir
+
+
+def _parse_yaml_files(atomics_dir: Path) -> list[dict]:
+    """Walk the atomics directory and parse all technique YAML files.
+
+    Returns a flat list of dicts, each representing a single atomic test
+    with the following keys::
+
+        technique_id, index, name, description, platforms,
+        executor_type, command, source_url
+    """
+    results: list[dict] = []
+    yaml_files = sorted(atomics_dir.glob("T*/T*.yaml"))
+    logger.info("Found %d YAML files to parse", len(yaml_files))
+
+    for yaml_path in yaml_files:
+        technique_id = yaml_path.stem  # e.g. "T1059.001"
+        try:
+            with open(yaml_path, "r", encoding="utf-8") as fh:
+                data = yaml.safe_load(fh)
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", yaml_path, exc)
+            continue
+
+        if not data or "atomic_tests" not in data:
+            continue
+
+        for idx, test in enumerate(data["atomic_tests"]):
+            name = test.get("name", "").strip()
+            description = test.get("description", "").strip()
+            platforms = test.get("supported_platforms", [])
+            executor = test.get("executor", {})
+            executor_type = executor.get("name", "") if isinstance(executor, dict) else ""
+            command = executor.get("command", "") if isinstance(executor, dict) else ""
+
+            # Build an atomic_test_id in the format "T1059.001-0"
+            atomic_test_id = f"{technique_id}-{idx}"
+
+            source_url = (
+                f"https://github.com/redcanaryco/atomic-red-team/blob/master"
+                f"/atomics/{technique_id}/{technique_id}.yaml"
+            )
+
+            results.append({
+                "technique_id": technique_id,
+                "index": idx,
+                "atomic_test_id": atomic_test_id,
+                "name": name,
+                "description": description,
+                "platforms": ", ".join(platforms) if isinstance(platforms, list) else str(platforms),
+                "executor_type": executor_type,
+                "command": command[:4000] if command else None,  # cap at 4k chars
+                "source_url": source_url,
+            })
+
+    logger.info("Parsed %d atomic tests total", len(results))
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def import_atomic_red_team(db: Session) -> dict:
+    """Download and import Atomic Red Team tests as TestTemplates.
+
+    Parameters
+    ----------
+    db : Session
+        Active SQLAlchemy database session.
+
+    Returns
+    -------
+    dict
+        Summary with keys ``created``, ``skipped_existing``,
+        ``yaml_files_parsed``, ``total_tests_parsed``.
+    """
+    tmp_dir = tempfile.mkdtemp(prefix="aegis_atomic_")
+    try:
+        zip_bytes = _download_zip()
+        atomics_dir = _extract_zip(zip_bytes, tmp_dir)
+        parsed_tests = _parse_yaml_files(atomics_dir)
+    finally:
+        # Always clean up
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+        logger.info("Cleaned up temp directory %s", tmp_dir)
+
+    # Pre-load existing atomic_test_ids for dedup
+    existing_ids: set[str] = {
+        row[0]
+        for row in db.query(TestTemplate.atomic_test_id)
+        .filter(TestTemplate.atomic_test_id.isnot(None))
+        .all()
+    }
+
+    created = 0
+    skipped = 0
+
+    for item in parsed_tests:
+        if item["atomic_test_id"] in existing_ids:
+            skipped += 1
+            continue
+
+        template = TestTemplate(
+            mitre_technique_id=item["technique_id"],
+            name=item["name"][:500] if item["name"] else f"Atomic Test {item['atomic_test_id']}",
+            description=item["description"][:2000] if item["description"] else None,
+            source="atomic_red_team",
+            source_url=item["source_url"],
+            attack_procedure=item["command"],
+            platform=item["platforms"],
+            tool_suggested=item["executor_type"] if item["executor_type"] else None,
+            atomic_test_id=item["atomic_test_id"],
+            is_active=True,
+        )
+        db.add(template)
+        existing_ids.add(item["atomic_test_id"])
+        created += 1
+
+    db.commit()
+
+    # Count distinct YAML files by technique_id
+    yaml_files_count = len({t["technique_id"] for t in parsed_tests})
+
+    summary = {
+        "created": created,
+        "skipped_existing": skipped,
+        "yaml_files_parsed": yaml_files_count,
+        "total_tests_parsed": len(parsed_tests),
+    }
+
+    logger.info(
+        "Atomic Red Team import complete — created=%d, skipped=%d, "
+        "yaml_files=%d, total_tests=%d",
+        created, skipped, yaml_files_count, len(parsed_tests),
+    )
+
+    # Audit log (system action)
+    log_action(
+        db,
+        user_id=None,
+        action="import_atomic_red_team",
+        entity_type="test_template",
+        entity_id=None,
+        details=summary,
+    )
+
+    return summary
@@ -1,36 +1,46 @@
 """Service for recalculating the global status of a Technique
-based on the state and result of its associated tests."""
+based on the state and result of its associated tests.
+
+V2 rules account for dual Red/Blue validation and use
+``detection_result`` (filled by Blue Team) instead of the legacy
+``result`` field.
+"""

 from sqlalchemy.orm import Session

-from app.models.enums import TechniqueStatus
+from app.models.enums import TechniqueStatus, TestState
 from app.models.technique import Technique


 def recalculate_technique_status(db: Session, technique: Technique) -> None:
    """Recompute ``technique.status_global`` from its tests and commit.

-    Rules
-    -----
-    - No tests → ``not_evaluated``
-    - Any test not yet ``validated`` → ``in_progress``
-    - All validated and all ``detected`` → ``validated``
-    - All validated and any ``partially_detected`` → ``partial``
-    - Otherwise → ``not_covered``
+    Rules (v2)
+    ----------
+    1. No tests → ``not_evaluated``
+    2. All tests ``validated`` → look at detection results:
+       - All ``detected`` → ``validated``
+       - Any ``partially_detected`` → ``partial``
+       - Otherwise → ``not_covered``
+    3. Some tests ``validated``, others still in progress → ``partial``
+    4. All tests in intermediate states (no validated) → ``in_progress``
    """
    tests = technique.tests

    if not tests:
        technique.status_global = TechniqueStatus.not_evaluated
-    elif any(t.state != "validated" for t in tests):
-        technique.status_global = TechniqueStatus.in_progress
-    else:
-        results = [t.result for t in tests]
-        if all(r == "detected" for r in results):
+    elif all(t.state == TestState.validated for t in tests):
+        # All validated — inspect detection results
+        results = [t.detection_result for t in tests if t.detection_result]
+        if results and all(str(r) == "detected" or r == "detected" for r in results):
            technique.status_global = TechniqueStatus.validated
-        elif any(r == "partially_detected" for r in results):
+        elif any(str(r) == "partially_detected" or r == "partially_detected" for r in results):
            technique.status_global = TechniqueStatus.partial
        else:
            technique.status_global = TechniqueStatus.not_covered
+    elif any(t.state == TestState.validated for t in tests):
+        technique.status_global = TechniqueStatus.partial
+    else:
+        technique.status_global = TechniqueStatus.in_progress

    db.commit()
@@ -0,0 +1,285 @@
+"""Test workflow service — state-machine transitions for the Red/Blue validation flow.
+
+Controls which state transitions are valid and exposes high-level helpers
+for each step in the test lifecycle:
+
+    draft → red_executing → blue_evaluating → in_review → validated / rejected
+                                                                  ↓
+                                                           rejected → draft
+
+Every public function validates the transition, mutates the test, writes an
+audit-log entry, and commits the session.
+"""
+
+from datetime import datetime
+
+from fastapi import HTTPException, status
+from sqlalchemy.orm import Session
+
+from app.models.enums import TestState
+from app.models.test import Test
+from app.models.user import User
+from app.services.audit_service import log_action
+
+# ---------------------------------------------------------------------------
+# Valid transition map
+# ---------------------------------------------------------------------------
+
+VALID_TRANSITIONS: dict[TestState, list[TestState]] = {
+    TestState.draft:           [TestState.red_executing],
+    TestState.red_executing:   [TestState.blue_evaluating],
+    TestState.blue_evaluating: [TestState.in_review],
+    TestState.in_review:       [TestState.validated, TestState.rejected],
+    TestState.rejected:        [TestState.draft],
+    TestState.validated:       [],  # terminal state
+}
+
+
+# ---------------------------------------------------------------------------
+# Core helpers
+# ---------------------------------------------------------------------------
+
+
+def can_transition(test: Test, target_state: TestState) -> bool:
+    """Return *True* if moving *test* to *target_state* is allowed."""
+    current = test.state if isinstance(test.state, TestState) else TestState(test.state)
+    return target_state in VALID_TRANSITIONS.get(current, [])
+
+
+def transition_state(
+    db: Session,
+    test: Test,
+    target_state: TestState,
+    user: User,
+    *,
+    action_name: str = "transition_state",
+    extra_details: dict | None = None,
+) -> Test:
+    """Validate and perform a state transition, log it, and commit.
+
+    Raises :class:`~fastapi.HTTPException` 400 when the transition is invalid.
+    """
+    if not can_transition(test, target_state):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=(
+                f"Invalid transition: cannot move from "
+                f"'{test.state.value if isinstance(test.state, TestState) else test.state}' "
+                f"to '{target_state.value}'"
+            ),
+        )
+
+    previous_state = test.state.value if isinstance(test.state, TestState) else test.state
+    test.state = target_state
+    db.flush()
+
+    details: dict = {
+        "previous_state": previous_state,
+        "new_state": target_state.value,
+        "test_name": test.name,
+        "technique_id": str(test.technique_id),
+    }
+    if extra_details:
+        details.update(extra_details)
+
+    log_action(
+        db,
+        user_id=user.id,
+        action=action_name,
+        entity_type="test",
+        entity_id=test.id,
+        details=details,
+    )
+
+    return test
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle convenience functions
+# ---------------------------------------------------------------------------
+
+
+def start_execution(db: Session, test: Test, user: User) -> Test:
+    """Move from ``draft`` → ``red_executing``.
+
+    Typically called by a **red_tech** when they begin the attack.
+    """
+    test = transition_state(
+        db, test, TestState.red_executing, user,
+        action_name="start_execution",
+    )
+    test.execution_date = datetime.utcnow()
+    db.commit()
+    return test
+
+
+def submit_red_evidence(db: Session, test: Test, user: User) -> Test:
+    """Move from ``red_executing`` → ``blue_evaluating``.
+
+    Called by **red_tech** once they have finished documenting the attack.
+    """
+    test = transition_state(
+        db, test, TestState.blue_evaluating, user,
+        action_name="submit_red_evidence",
+    )
+    db.commit()
+    return test
+
+
+def submit_blue_evidence(db: Session, test: Test, user: User) -> Test:
+    """Move from ``blue_evaluating`` → ``in_review``.
+
+    Called by **blue_tech** once they have finished documenting detection.
+    """
+    test = transition_state(
+        db, test, TestState.in_review, user,
+        action_name="submit_blue_evidence",
+    )
+    db.commit()
+    return test
+
+
+def validate_as_red_lead(
+    db: Session,
+    test: Test,
+    user: User,
+    validation_status: str,
+    notes: str | None = None,
+) -> Test:
+    """Record Red Lead's validation decision.
+
+    *validation_status* must be ``"approved"`` or ``"rejected"``.
+    After recording the decision, :func:`check_dual_validation` is called
+    to potentially advance the test to ``validated`` or ``rejected``.
+    """
+    if test.state not in (TestState.in_review,):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Cannot validate red side while test is in '{test.state.value if isinstance(test.state, TestState) else test.state}' state (must be in_review)",
+        )
+
+    if validation_status not in ("approved", "rejected"):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="validation_status must be 'approved' or 'rejected'",
+        )
+
+    now = datetime.utcnow()
+    test.red_validation_status = validation_status
+    test.red_validated_by = user.id
+    test.red_validated_at = now
+    test.red_validation_notes = notes
+
+    log_action(
+        db,
+        user_id=user.id,
+        action="validate_as_red_lead",
+        entity_type="test",
+        entity_id=test.id,
+        details={
+            "validation_status": validation_status,
+            "notes": notes,
+            "technique_id": str(test.technique_id),
+        },
+    )
+
+    check_dual_validation(db, test)
+    return test
+
+
+def validate_as_blue_lead(
+    db: Session,
+    test: Test,
+    user: User,
+    validation_status: str,
+    notes: str | None = None,
+) -> Test:
+    """Record Blue Lead's validation decision.
+
+    *validation_status* must be ``"approved"`` or ``"rejected"``.
+    After recording the decision, :func:`check_dual_validation` is called
+    to potentially advance the test to ``validated`` or ``rejected``.
+    """
+    if test.state not in (TestState.in_review,):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Cannot validate blue side while test is in '{test.state.value if isinstance(test.state, TestState) else test.state}' state (must be in_review)",
+        )
+
+    if validation_status not in ("approved", "rejected"):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="validation_status must be 'approved' or 'rejected'",
+        )
+
+    now = datetime.utcnow()
+    test.blue_validation_status = validation_status
+    test.blue_validated_by = user.id
+    test.blue_validated_at = now
+    test.blue_validation_notes = notes
+
+    log_action(
+        db,
+        user_id=user.id,
+        action="validate_as_blue_lead",
+        entity_type="test",
+        entity_id=test.id,
+        details={
+            "validation_status": validation_status,
+            "notes": notes,
+            "technique_id": str(test.technique_id),
+        },
+    )
+
+    check_dual_validation(db, test)
+    return test
+
+
+def check_dual_validation(db: Session, test: Test) -> Test:
+    """Evaluate both leads' decisions and advance the test if both have voted.
+
+    - Both **approved** → ``validated``
+    - Either **rejected** → ``rejected``
+    - Otherwise no state change (waiting for the other lead).
+
+    Commits only when the state actually changes.
+    """
+    red_status = test.red_validation_status
+    blue_status = test.blue_validation_status
+
+    if red_status == "rejected" or blue_status == "rejected":
+        test.state = TestState.rejected
+        db.commit()
+    elif red_status == "approved" and blue_status == "approved":
+        test.state = TestState.validated
+        db.commit()
+    else:
+        # One side hasn't voted yet — stay in_review, just flush
+        db.commit()
+
+    return test
+
+
+def reopen_test(db: Session, test: Test, user: User) -> Test:
+    """Move a ``rejected`` test back to ``draft``, clearing validation fields.
+
+    This allows the teams to redo the test cycle.
+    """
+    test = transition_state(
+        db, test, TestState.draft, user,
+        action_name="reopen_test",
+    )
+
+    # Clear dual-validation fields
+    test.red_validation_status = None
+    test.red_validated_by = None
+    test.red_validated_at = None
+    test.red_validation_notes = None
+
+    test.blue_validation_status = None
+    test.blue_validated_by = None
+    test.blue_validated_at = None
+    test.blue_validation_notes = None
+
+    db.commit()
+    return test