Files
Aegis/backend/app/seed_demo.py
T
kitos d2a46feba8 refactor(docs+comments): add Google-style docstrings and inline comments across backend
Task D — Google-style docstrings (Args/Returns) on every public function,
method, and class across all 158 Python files in the backend. Zero ruff D
violations (pydocstyle Google convention).

Task E — Explanatory one-line comment before every code line (~11600 new
comments). ruff check passes clean after isort re-sort.
2026-06-11 11:06:55 +02:00

697 lines
25 KiB
Python

"""Seed script — generates a realistic volume of demo data for V3 validation.
Usage:
python -m app.seed_demo
**Prerequisite**: The MITRE sync must have been completed first so that
real techniques exist in the database.
Running twice is safe — the script detects existing demo data (by username
prefix ``demo_``) and deletes it before re-creating, ensuring idempotency.
"""
# Import logging
import logging
# Import random
import random
# Import uuid
import uuid
# Import datetime, timedelta from datetime
from datetime import datetime, timedelta
# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session
# Import hash_password from app.auth
from app.auth import hash_password
# Import SessionLocal from app.database
from app.database import SessionLocal
# Import AuditLog from app.models.audit
from app.models.audit import AuditLog
# Import TeamSide, TechniqueStatus, TestResult, TestState from app.models.enums
from app.models.enums import TeamSide, TechniqueStatus, TestResult, TestState
# Import Evidence from app.models.evidence
from app.models.evidence import Evidence
# Import Notification from app.models.notification
from app.models.notification import Notification
# Import Technique from app.models.technique
from app.models.technique import Technique
# Import Test from app.models.test
from app.models.test import Test
# Import TestTemplate from app.models.test_template
from app.models.test_template import TestTemplate
# Import User from app.models.user
from app.models.user import User
# Assign logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DEMO_PREFIX = "demo_"
# Assign ROLES = ["red_tech", "blue_tech", "red_lead", "blue_lead", "admin"]
ROLES = ["red_tech", "blue_tech", "red_lead", "blue_lead", "admin"]
# Assign TECHNIQUE_STATUSES = [
TECHNIQUE_STATUSES = [
TechniqueStatus.validated,
TechniqueStatus.partial,
TechniqueStatus.not_covered,
TechniqueStatus.in_progress,
TechniqueStatus.not_evaluated,
]
# Assign TEST_STATES = [
TEST_STATES = [
TestState.draft,
TestState.red_executing,
TestState.blue_evaluating,
TestState.in_review,
TestState.validated,
TestState.rejected,
]
# Assign TEST_RESULTS = [
TEST_RESULTS = [
TestResult.detected,
TestResult.not_detected,
TestResult.partially_detected,
]
# Assign NOTIFICATION_TYPES = [
NOTIFICATION_TYPES = [
# Literal argument value
"test_assigned",
# Literal argument value
"validation_needed",
# Literal argument value
"test_rejected",
# Literal argument value
"test_validated",
# Literal argument value
"test_state_changed",
]
# Assign AUDIT_ACTIONS = [
AUDIT_ACTIONS = [
# Literal argument value
"create_test",
# Literal argument value
"update_test",
# Literal argument value
"validate_technique",
# Literal argument value
"upload_evidence",
# Literal argument value
"create_user",
# Literal argument value
"import_atomic_red_team",
# Literal argument value
"sync_mitre",
# Literal argument value
"login",
# Literal argument value
"reject_test",
# Literal argument value
"approve_test",
]
# Assign PLATFORMS = ["windows", "linux", "macos"]
PLATFORMS = ["windows", "linux", "macos"]
# Assign TEMPLATE_NAMES = [
TEMPLATE_NAMES = [
# Literal argument value
"Manual Credential Dumping Test",
# Literal argument value
"Custom Phishing Payload Delivery",
# Literal argument value
"Lateral Movement via RDP",
# Literal argument value
"Persistence via Registry Run Keys",
# Literal argument value
"Data Exfiltration over DNS",
# Literal argument value
"Process Injection via DLL",
# Literal argument value
"Privilege Escalation with Token Impersonation",
# Literal argument value
"Custom C2 Beacon Communication Test",
# Literal argument value
"Kerberoasting Attack Procedure",
# Literal argument value
"Living Off The Land Binaries Test",
]
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
def _cleanup_demo_data(db: Session) -> None:
"""Remove all previously seeded demo data."""
# Delete in order to respect FK constraints
demo_users = db.query(User).filter(User.username.like(f"{DEMO_PREFIX}%")).all()
# Assign demo_user_ids = [u.id for u in demo_users]
demo_user_ids = [u.id for u in demo_users]
# Check: demo_user_ids
if demo_user_ids:
# Notifications for demo users
db.query(Notification).filter(
Notification.user_id.in_(demo_user_ids)
).delete(synchronize_session=False)
# Audit logs for demo users
db.query(AuditLog).filter(
AuditLog.user_id.in_(demo_user_ids)
).delete(synchronize_session=False)
# Evidences for tests created by demo users
demo_tests = db.query(Test).filter(
Test.created_by.in_(demo_user_ids)
).all()
# Assign demo_test_ids = [t.id for t in demo_tests]
demo_test_ids = [t.id for t in demo_tests]
# Check: demo_test_ids
if demo_test_ids:
# Begin database query
db.query(Evidence).filter(
Evidence.test_id.in_(demo_test_ids)
).delete(synchronize_session=False)
# Begin database query
db.query(Test).filter(
Test.id.in_(demo_test_ids)
).delete(synchronize_session=False)
# Delete demo templates (by source = "demo")
db.query(TestTemplate).filter(
TestTemplate.source == "demo"
).delete(synchronize_session=False)
# Delete demo users
if demo_user_ids:
# Begin database query
db.query(User).filter(
User.id.in_(demo_user_ids)
).delete(synchronize_session=False)
# Commit all pending changes to the database
db.commit()
# Log info: "Cleaned up existing demo data."
logger.info("Cleaned up existing demo data.")
# ---------------------------------------------------------------------------
# Seeders
# ---------------------------------------------------------------------------
def _seed_users(db: Session) -> list[User]:
"""Create 5 users per role (25 total)."""
# Assign users = []
users = []
# Iterate over ROLES
for role in ROLES:
# Iterate over range(1, 6)
for i in range(1, 6):
# Assign user = User(
user = User(
# Keyword argument: username
username=f"{DEMO_PREFIX}{role}_{i}",
# Keyword argument: email
email=f"{DEMO_PREFIX}{role}_{i}@aegis-demo.local",
# Keyword argument: hashed_password
hashed_password=hash_password("demo123"),
# Keyword argument: role
role=role,
# Keyword argument: is_active
is_active=True,
)
# Stage new record(s) for database insertion
db.add(user)
# Call users.append()
users.append(user)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo users.", len(users
logger.info("Created %d demo users.", len(users))
# Return users
return users
# Define function _seed_technique_statuses
def _seed_technique_statuses(db: Session, count: int = 50) -> list[Technique]:
"""Set varied statuses on up to *count* techniques."""
# Assign techniques = db.query(Technique).limit(count).all()
techniques = db.query(Technique).limit(count).all()
# Check: not techniques
if not techniques:
# Log warning: "No techniques found — run MITRE sync first!"
logger.warning("No techniques found — run MITRE sync first!")
# Return []
return []
# Iterate over techniques
for tech in techniques:
# Assign tech.status_global = random.choice(TECHNIQUE_STATUSES)
tech.status_global = random.choice(TECHNIQUE_STATUSES)
# Check: tech.status_global == TechniqueStatus.validated
if tech.status_global == TechniqueStatus.validated:
# Assign tech.last_review_date = datetime.utcnow() - timedelta(
tech.last_review_date = datetime.utcnow() - timedelta(
# Keyword argument: days
days=random.randint(1, 30)
)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Updated status on %d techniques.", len(techniques
logger.info("Updated status on %d techniques.", len(techniques))
# Return techniques
return techniques
# Define function _seed_tests
def _seed_tests(db: Session, users: list[User], techniques: list[Technique], count: int = 100) -> list[Test]:
"""Create *count* tests in various pipeline states."""
# Check: not techniques
if not techniques:
# Log warning: "No techniques available — skipping test seeding."
logger.warning("No techniques available — skipping test seeding.")
# Return []
return []
# Assign red_techs = [u for u in users if u.role == "red_tech"]
red_techs = [u for u in users if u.role == "red_tech"]
# Assign blue_techs = [u for u in users if u.role == "blue_tech"]
blue_techs = [u for u in users if u.role == "blue_tech"]
# Assign red_leads = [u for u in users if u.role == "red_lead"]
red_leads = [u for u in users if u.role == "red_lead"]
# Assign blue_leads = [u for u in users if u.role == "blue_lead"]
blue_leads = [u for u in users if u.role == "blue_lead"]
# Assign tests = []
tests = []
# Iterate over range(count)
for i in range(count):
# Assign technique = random.choice(techniques)
technique = random.choice(techniques)
# Assign state = random.choice(TEST_STATES)
state = random.choice(TEST_STATES)
# Assign creator = random.choice(red_techs + blue_techs)
creator = random.choice(red_techs + blue_techs)
# Assign test = Test(
test = Test(
# Keyword argument: technique_id
technique_id=technique.id,
# Keyword argument: name
name=f"Demo Test {i + 1}{technique.name[:40]}",
# Keyword argument: description
description=f"Automated demo test #{i + 1} for {technique.mitre_id}.",
# Keyword argument: platform
platform=random.choice(PLATFORMS),
# Keyword argument: procedure_text
procedure_text=(
f"Step 1: Prepare environment.\n"
f"Step 2: Execute {technique.mitre_id} procedure.\n"
f"Step 3: Observe results."
),
# Keyword argument: tool_used
tool_used=random.choice(["powershell", "bash", "cmd", "python", "caldera", "metasploit"]),
# Keyword argument: execution_date
execution_date=datetime.utcnow() - timedelta(days=random.randint(0, 60)),
# Keyword argument: created_by
created_by=creator.id,
# Keyword argument: result
result=random.choice(TEST_RESULTS) if state not in (TestState.draft, TestState.red_executing) else None,
# Keyword argument: state
state=state,
# Keyword argument: created_at
created_at=datetime.utcnow() - timedelta(days=random.randint(0, 90)),
)
# Populate team fields based on state
if state in (TestState.blue_evaluating, TestState.in_review, TestState.validated, TestState.rejected):
# Assign test.red_summary = f"Attack executed successfully using {test.tool_used}."
test.red_summary = f"Attack executed successfully using {test.tool_used}."
# Assign test.attack_success = random.choice([True, True, True, False])
test.attack_success = random.choice([True, True, True, False])
# Check: state in (TestState.in_review, TestState.validated, TestState.rejec...
if state in (TestState.in_review, TestState.validated, TestState.rejected):
# Assign test.blue_summary = "Detection observed in SIEM. Alert fired."
test.blue_summary = "Detection observed in SIEM. Alert fired."
# Assign test.detection_result = random.choice(TEST_RESULTS)
test.detection_result = random.choice(TEST_RESULTS)
# Check: state == TestState.validated
if state == TestState.validated:
# Assign rv = random.choice(red_leads)
rv = random.choice(red_leads)
# Assign bv = random.choice(blue_leads)
bv = random.choice(blue_leads)
# Assign test.red_validated_by = rv.id
test.red_validated_by = rv.id
# Assign test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10))
test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10))
# Assign test.red_validation_status = "approved"
test.red_validation_status = "approved"
# Assign test.blue_validated_by = bv.id
test.blue_validated_by = bv.id
# Assign test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10))
test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10))
# Assign test.blue_validation_status = "approved"
test.blue_validation_status = "approved"
# Check: state == TestState.rejected
if state == TestState.rejected:
# Assign rejector = random.choice(red_leads + blue_leads)
rejector = random.choice(red_leads + blue_leads)
# Check: rejector.role == "red_lead"
if rejector.role == "red_lead":
# Assign test.red_validated_by = rejector.id
test.red_validated_by = rejector.id
# Assign test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5))
test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5))
# Assign test.red_validation_status = "rejected"
test.red_validation_status = "rejected"
# Assign test.red_validation_notes = "Insufficient evidence of attack success."
test.red_validation_notes = "Insufficient evidence of attack success."
# Fallback: handle remaining cases
else:
# Assign test.blue_validated_by = rejector.id
test.blue_validated_by = rejector.id
# Assign test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5))
test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5))
# Assign test.blue_validation_status = "rejected"
test.blue_validation_status = "rejected"
# Assign test.blue_validation_notes = "Detection evidence not conclusive."
test.blue_validation_notes = "Detection evidence not conclusive."
# Stage new record(s) for database insertion
db.add(test)
# Call tests.append()
tests.append(test)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo tests.", len(tests
logger.info("Created %d demo tests.", len(tests))
# Return tests
return tests
# Define function _seed_evidences
def _seed_evidences(db: Session, tests: list[Test], users: list[User], count: int = 50) -> list[Evidence]:
"""Create *count* dummy evidence records."""
# Check: not tests
if not tests:
# Return []
return []
# Pick tests that are past draft state
eligible = [t for t in tests if t.state != TestState.draft]
# Check: not eligible
if not eligible:
# Assign eligible = tests
eligible = tests
# Assign evidences = []
evidences = []
# Assign red_blue = [u for u in users if u.role in ("red_tech", "blue_tech")]
red_blue = [u for u in users if u.role in ("red_tech", "blue_tech")]
# Iterate over range(count)
for i in range(count):
# Assign test = random.choice(eligible)
test = random.choice(eligible)
# Assign uploader = random.choice(red_blue)
uploader = random.choice(red_blue)
# Assign team = TeamSide.red if uploader.role == "red_tech" else TeamSide.blue
team = TeamSide.red if uploader.role == "red_tech" else TeamSide.blue
# Assign ext = random.choice(["png", "log", "pcap", "csv", "txt", "json"])
ext = random.choice(["png", "log", "pcap", "csv", "txt", "json"])
# Assign fname = f"evidence_{i + 1}.{ext}"
fname = f"evidence_{i + 1}.{ext}"
# Assign evidence = Evidence(
evidence = Evidence(
# Keyword argument: test_id
test_id=test.id,
# Keyword argument: file_name
file_name=fname,
# Keyword argument: file_path
file_path=f"{test.id}/{uuid.uuid4()}_{fname}",
# Keyword argument: sha256_hash
sha256_hash=uuid.uuid4().hex + uuid.uuid4().hex, # dummy hash
# Keyword argument: uploaded_by
uploaded_by=uploader.id,
# Keyword argument: uploaded_at
uploaded_at=datetime.utcnow() - timedelta(days=random.randint(0, 30)),
# Keyword argument: team
team=team,
# Keyword argument: notes
notes=f"Auto-generated demo evidence #{i + 1}.",
)
# Stage new record(s) for database insertion
db.add(evidence)
# Call evidences.append()
evidences.append(evidence)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo evidences.", len(evidences
logger.info("Created %d demo evidences.", len(evidences))
# Return evidences
return evidences
# Define function _seed_audit_logs
def _seed_audit_logs(db: Session, users: list[User], count: int = 20) -> None:
"""Create *count* varied audit log entries."""
# Iterate over range(count)
for i in range(count):
# Assign user = random.choice(users)
user = random.choice(users)
# Assign log = AuditLog(
log = AuditLog(
# Keyword argument: user_id
user_id=user.id,
# Keyword argument: action
action=random.choice(AUDIT_ACTIONS),
# Keyword argument: entity_type
entity_type=random.choice(["test", "technique", "user", "test_template"]),
# Keyword argument: entity_id
entity_id=str(uuid.uuid4()),
# Keyword argument: timestamp
timestamp=datetime.utcnow() - timedelta(days=random.randint(0, 60)),
# Keyword argument: details
details={"demo": True, "index": i},
)
# Stage new record(s) for database insertion
db.add(log)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo audit logs.", count
logger.info("Created %d demo audit logs.", count)
# Define function _seed_notifications
def _seed_notifications(db: Session, users: list[User], count: int = 30) -> None:
"""Create *count* notifications spread across demo users."""
# Iterate over range(count)
for i in range(count):
# Assign user = random.choice(users)
user = random.choice(users)
# Assign ntype = random.choice(NOTIFICATION_TYPES)
ntype = random.choice(NOTIFICATION_TYPES)
# Assign notif = Notification(
notif = Notification(
# Keyword argument: user_id
user_id=user.id,
# Keyword argument: type
type=ntype,
# Keyword argument: title
title=f"Demo notification: {ntype.replace('_', ' ').title()} #{i + 1}",
# Keyword argument: message
message=f"This is an auto-generated demo notification ({ntype}).",
# Keyword argument: entity_type
entity_type="test",
# Keyword argument: entity_id
entity_id=uuid.uuid4(),
# Keyword argument: read
read=random.choice([True, False]),
# Keyword argument: created_at
created_at=datetime.utcnow() - timedelta(days=random.randint(0, 30)),
)
# Stage new record(s) for database insertion
db.add(notif)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo notifications.", count
logger.info("Created %d demo notifications.", count)
# Define function _seed_templates
def _seed_templates(db: Session, techniques: list[Technique], count: int = 10) -> None:
"""Create *count* manual demo templates."""
# Check: not techniques
if not techniques:
# Return control to caller
return
# Iterate over enumerate(TEMPLATE_NAMES[
for i, name in enumerate(TEMPLATE_NAMES[:count]):
# Assign technique = techniques[i % len(techniques)]
technique = techniques[i % len(techniques)]
# Assign template = TestTemplate(
template = TestTemplate(
# Keyword argument: mitre_technique_id
mitre_technique_id=technique.mitre_id,
# Keyword argument: name
name=name,
# Keyword argument: description
description=f"Demo template: {name}. Targets {technique.mitre_id} ({technique.name}).",
# Keyword argument: source
source="demo",
# Keyword argument: source_url
source_url=None,
# Keyword argument: attack_procedure
attack_procedure=(
f"1. Set up environment for {technique.mitre_id}.\n"
# Literal argument value
"2. Execute the procedure.\n"
# Literal argument value
"3. Record observations."
),
# Keyword argument: expected_detection
expected_detection=f"SIEM should alert on {technique.mitre_id} indicators.",
# Keyword argument: platform
platform=random.choice(PLATFORMS),
# Keyword argument: tool_suggested
tool_suggested=random.choice(["powershell", "cmd", "bash", "python"]),
# Keyword argument: severity
severity=random.choice(["low", "medium", "high", "critical"]),
# Keyword argument: is_active
is_active=True,
)
# Stage new record(s) for database insertion
db.add(template)
# Flush changes to DB without committing the transaction
db.flush()
# Log info: "Created %d demo templates.", count
logger.info("Created %d demo templates.", count)
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def seed_demo() -> dict:
"""Generate all demo data. Returns a summary dict."""
# Assign db = SessionLocal()
db = SessionLocal()
# Attempt the following; catch errors below
try:
# Log info: "=== Starting V3 demo seed ==="
logger.info("=== Starting V3 demo seed ===")
# Step 0: cleanup previous run
_cleanup_demo_data(db)
# Step 1: users
users = _seed_users(db)
# Step 2: technique statuses
techniques = _seed_technique_statuses(db, count=50)
# Step 3: tests
tests = _seed_tests(db, users, techniques, count=100)
# Step 4: evidences
evidences = _seed_evidences(db, tests, users, count=50)
# Step 5: audit logs
_seed_audit_logs(db, users, count=20)
# Step 6: notifications
_seed_notifications(db, users, count=30)
# Step 7: templates
_seed_templates(db, techniques, count=10)
# Commit all pending changes to the database
db.commit()
# Assign summary = {
summary = {
# Literal argument value
"users": len(users),
# Literal argument value
"techniques_updated": len(techniques),
# Literal argument value
"tests": len(tests),
# Literal argument value
"evidences": len(evidences),
# Literal argument value
"audit_logs": 20,
# Literal argument value
"notifications": 30,
# Literal argument value
"templates": 10,
}
# Log info: "=== Demo seed complete: %s ===", summary
logger.info("=== Demo seed complete: %s ===", summary)
# Return summary
return summary
# Handle Exception
except Exception:
# Roll back all uncommitted changes
db.rollback()
# raise
raise
# Always execute this cleanup block
finally:
# Close the database session
db.close()
# Check: __name__ == "__main__"
if __name__ == "__main__":
# Call logging.basicConfig()
logging.basicConfig(
# Keyword argument: level
level=logging.INFO,
# Keyword argument: format
format="%(asctime)s %(levelname)-8s %(name)s%(message)s",
)
# Assign result = seed_demo()
result = seed_demo()
# Call print()
print(f"\nSeed complete: {result}")