feat(phase-11): implement Red/Blue business logic services (T-106, T-107, T-108)
T-106: Create test_workflow_service.py with state-machine transitions for the complete test lifecycle (draft -> red_executing -> blue_evaluating -> in_review -> validated/rejected), dual validation by Red/Blue leads, and reopen capability with field cleanup. T-107: Update status_service.py to use detection_result from Blue Team instead of legacy result field, and differentiate between partial progress (some validated) vs all-in-progress states. T-108: Create atomic_import_service.py that downloads the Atomic Red Team repo as a ZIP (avoiding API rate limits), parses all atomics YAML files, and creates idempotent TestTemplate records mapped to MITRE techniques. Includes validation tests for all three tasks (19 checks total).
This commit is contained in:
231
backend/app/services/atomic_import_service.py
Normal file
231
backend/app/services/atomic_import_service.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""Atomic Red Team import service.
|
||||
|
||||
Downloads the Atomic Red Team repository ZIP from GitHub, parses every
|
||||
``atomics/T*/T*.yaml`` file, and upserts :class:`TestTemplate` records
|
||||
into the database.
|
||||
|
||||
Strategy
|
||||
--------
|
||||
The GitHub REST API without authentication only allows 60 req/hour.
|
||||
Since the Atomic Red Team repo contains 1 500+ YAML files we avoid
|
||||
per-file requests entirely. Instead we:
|
||||
|
||||
1. Download the full repo as a ZIP archive (~40 MB).
|
||||
2. Extract in a temporary directory.
|
||||
3. Walk ``atomics/T*/T*.yaml`` files parsing them with PyYAML.
|
||||
4. Create / update ``TestTemplate`` rows keyed by ``atomic_test_id``.
|
||||
5. Clean up the temporary directory.
|
||||
|
||||
Idempotency
|
||||
-----------
|
||||
Running the import twice does **not** create duplicates. Existing
|
||||
templates are identified by their ``atomic_test_id`` and simply skipped.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import requests as _requests
|
||||
import yaml
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.test_template import TestTemplate
|
||||
from app.services.audit_service import log_action
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ATOMIC_RT_ZIP_URL = (
|
||||
"https://github.com/redcanaryco/atomic-red-team"
|
||||
"/archive/refs/heads/master.zip"
|
||||
)
|
||||
|
||||
# Request timeout for the ZIP download (seconds)
|
||||
_DOWNLOAD_TIMEOUT = 300
|
||||
|
||||
# Top-level directory name inside the ZIP
|
||||
_ZIP_ROOT_PREFIX = "atomic-red-team-master"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _download_zip(url: str = ATOMIC_RT_ZIP_URL) -> bytes:
|
||||
"""Download the Atomic Red Team ZIP and return its raw bytes."""
|
||||
logger.info("Downloading Atomic Red Team ZIP from %s …", url)
|
||||
resp = _requests.get(url, timeout=_DOWNLOAD_TIMEOUT, stream=True)
|
||||
resp.raise_for_status()
|
||||
content = resp.content
|
||||
logger.info("Downloaded %.1f MB", len(content) / (1024 * 1024))
|
||||
return content
|
||||
|
||||
|
||||
def _extract_zip(zip_bytes: bytes, dest: str) -> Path:
|
||||
"""Extract *zip_bytes* into *dest* and return the path to the atomics/ dir."""
|
||||
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
|
||||
zf.extractall(dest)
|
||||
atomics_dir = Path(dest) / _ZIP_ROOT_PREFIX / "atomics"
|
||||
if not atomics_dir.is_dir():
|
||||
raise FileNotFoundError(
|
||||
f"Expected atomics directory not found at {atomics_dir}"
|
||||
)
|
||||
return atomics_dir
|
||||
|
||||
|
||||
def _parse_yaml_files(atomics_dir: Path) -> list[dict]:
|
||||
"""Walk the atomics directory and parse all technique YAML files.
|
||||
|
||||
Returns a flat list of dicts, each representing a single atomic test
|
||||
with the following keys::
|
||||
|
||||
technique_id, index, name, description, platforms,
|
||||
executor_type, command, source_url
|
||||
"""
|
||||
results: list[dict] = []
|
||||
yaml_files = sorted(atomics_dir.glob("T*/T*.yaml"))
|
||||
logger.info("Found %d YAML files to parse", len(yaml_files))
|
||||
|
||||
for yaml_path in yaml_files:
|
||||
technique_id = yaml_path.stem # e.g. "T1059.001"
|
||||
try:
|
||||
with open(yaml_path, "r", encoding="utf-8") as fh:
|
||||
data = yaml.safe_load(fh)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to parse %s: %s", yaml_path, exc)
|
||||
continue
|
||||
|
||||
if not data or "atomic_tests" not in data:
|
||||
continue
|
||||
|
||||
for idx, test in enumerate(data["atomic_tests"]):
|
||||
name = test.get("name", "").strip()
|
||||
description = test.get("description", "").strip()
|
||||
platforms = test.get("supported_platforms", [])
|
||||
executor = test.get("executor", {})
|
||||
executor_type = executor.get("name", "") if isinstance(executor, dict) else ""
|
||||
command = executor.get("command", "") if isinstance(executor, dict) else ""
|
||||
|
||||
# Build an atomic_test_id in the format "T1059.001-0"
|
||||
atomic_test_id = f"{technique_id}-{idx}"
|
||||
|
||||
source_url = (
|
||||
f"https://github.com/redcanaryco/atomic-red-team/blob/master"
|
||||
f"/atomics/{technique_id}/{technique_id}.yaml"
|
||||
)
|
||||
|
||||
results.append({
|
||||
"technique_id": technique_id,
|
||||
"index": idx,
|
||||
"atomic_test_id": atomic_test_id,
|
||||
"name": name,
|
||||
"description": description,
|
||||
"platforms": ", ".join(platforms) if isinstance(platforms, list) else str(platforms),
|
||||
"executor_type": executor_type,
|
||||
"command": command[:4000] if command else None, # cap at 4k chars
|
||||
"source_url": source_url,
|
||||
})
|
||||
|
||||
logger.info("Parsed %d atomic tests total", len(results))
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def import_atomic_red_team(db: Session) -> dict:
|
||||
"""Download and import Atomic Red Team tests as TestTemplates.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db : Session
|
||||
Active SQLAlchemy database session.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Summary with keys ``created``, ``skipped_existing``,
|
||||
``yaml_files_parsed``, ``total_tests_parsed``.
|
||||
"""
|
||||
tmp_dir = tempfile.mkdtemp(prefix="aegis_atomic_")
|
||||
try:
|
||||
zip_bytes = _download_zip()
|
||||
atomics_dir = _extract_zip(zip_bytes, tmp_dir)
|
||||
parsed_tests = _parse_yaml_files(atomics_dir)
|
||||
finally:
|
||||
# Always clean up
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
logger.info("Cleaned up temp directory %s", tmp_dir)
|
||||
|
||||
# Pre-load existing atomic_test_ids for dedup
|
||||
existing_ids: set[str] = {
|
||||
row[0]
|
||||
for row in db.query(TestTemplate.atomic_test_id)
|
||||
.filter(TestTemplate.atomic_test_id.isnot(None))
|
||||
.all()
|
||||
}
|
||||
|
||||
created = 0
|
||||
skipped = 0
|
||||
|
||||
for item in parsed_tests:
|
||||
if item["atomic_test_id"] in existing_ids:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
template = TestTemplate(
|
||||
mitre_technique_id=item["technique_id"],
|
||||
name=item["name"][:500] if item["name"] else f"Atomic Test {item['atomic_test_id']}",
|
||||
description=item["description"][:2000] if item["description"] else None,
|
||||
source="atomic_red_team",
|
||||
source_url=item["source_url"],
|
||||
attack_procedure=item["command"],
|
||||
platform=item["platforms"],
|
||||
tool_suggested=item["executor_type"] if item["executor_type"] else None,
|
||||
atomic_test_id=item["atomic_test_id"],
|
||||
is_active=True,
|
||||
)
|
||||
db.add(template)
|
||||
existing_ids.add(item["atomic_test_id"])
|
||||
created += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
# Count distinct YAML files by technique_id
|
||||
yaml_files_count = len({t["technique_id"] for t in parsed_tests})
|
||||
|
||||
summary = {
|
||||
"created": created,
|
||||
"skipped_existing": skipped,
|
||||
"yaml_files_parsed": yaml_files_count,
|
||||
"total_tests_parsed": len(parsed_tests),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Atomic Red Team import complete — created=%d, skipped=%d, "
|
||||
"yaml_files=%d, total_tests=%d",
|
||||
created, skipped, yaml_files_count, len(parsed_tests),
|
||||
)
|
||||
|
||||
# Audit log (system action)
|
||||
log_action(
|
||||
db,
|
||||
user_id=None,
|
||||
action="import_atomic_red_team",
|
||||
entity_type="test_template",
|
||||
entity_id=None,
|
||||
details=summary,
|
||||
)
|
||||
|
||||
return summary
|
||||
Reference in New Issue
Block a user