"""Seed script — generates a realistic volume of demo data for V3 validation. Usage: python -m app.seed_demo **Prerequisite**: The MITRE sync must have been completed first so that real techniques exist in the database. Running twice is safe — the script detects existing demo data (by username prefix ``demo_``) and deletes it before re-creating, ensuring idempotency. """ # Import logging import logging # Import random import random # Import uuid import uuid # Import datetime, timedelta from datetime from datetime import datetime, timedelta # Import Session from sqlalchemy.orm from sqlalchemy.orm import Session # Import hash_password from app.auth from app.auth import hash_password # Import SessionLocal from app.database from app.database import SessionLocal # Import AuditLog from app.models.audit from app.models.audit import AuditLog # Import TeamSide, TechniqueStatus, TestResult, TestState from app.models.enums from app.models.enums import TeamSide, TechniqueStatus, TestResult, TestState # Import Evidence from app.models.evidence from app.models.evidence import Evidence # Import Notification from app.models.notification from app.models.notification import Notification # Import Technique from app.models.technique from app.models.technique import Technique # Import Test from app.models.test from app.models.test import Test # Import TestTemplate from app.models.test_template from app.models.test_template import TestTemplate # Import User from app.models.user from app.models.user import User # Assign logger = logging.getLogger(__name__) logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- DEMO_PREFIX = "demo_" # Assign ROLES = ["red_tech", "blue_tech", "red_lead", "blue_lead", "admin"] ROLES = ["red_tech", "blue_tech", "red_lead", "blue_lead", "admin"] # Assign TECHNIQUE_STATUSES = [ TECHNIQUE_STATUSES = [ TechniqueStatus.validated, TechniqueStatus.partial, TechniqueStatus.not_covered, TechniqueStatus.in_progress, TechniqueStatus.not_evaluated, ] # Assign TEST_STATES = [ TEST_STATES = [ TestState.draft, TestState.red_executing, TestState.blue_evaluating, TestState.in_review, TestState.validated, TestState.rejected, ] # Assign TEST_RESULTS = [ TEST_RESULTS = [ TestResult.detected, TestResult.not_detected, TestResult.partially_detected, ] # Assign NOTIFICATION_TYPES = [ NOTIFICATION_TYPES = [ # Literal argument value "test_assigned", # Literal argument value "validation_needed", # Literal argument value "test_rejected", # Literal argument value "test_validated", # Literal argument value "test_state_changed", ] # Assign AUDIT_ACTIONS = [ AUDIT_ACTIONS = [ # Literal argument value "create_test", # Literal argument value "update_test", # Literal argument value "validate_technique", # Literal argument value "upload_evidence", # Literal argument value "create_user", # Literal argument value "import_atomic_red_team", # Literal argument value "sync_mitre", # Literal argument value "login", # Literal argument value "reject_test", # Literal argument value "approve_test", ] # Assign PLATFORMS = ["windows", "linux", "macos"] PLATFORMS = ["windows", "linux", "macos"] # Assign TEMPLATE_NAMES = [ TEMPLATE_NAMES = [ # Literal argument value "Manual Credential Dumping Test", # Literal argument value "Custom Phishing Payload Delivery", # Literal argument value "Lateral Movement via RDP", # Literal argument value "Persistence via Registry Run Keys", # Literal argument value "Data Exfiltration over DNS", # Literal argument value "Process Injection via DLL", # Literal argument value "Privilege Escalation with Token Impersonation", # Literal argument value "Custom C2 Beacon Communication Test", # Literal argument value "Kerberoasting Attack Procedure", # Literal argument value "Living Off The Land Binaries Test", ] # --------------------------------------------------------------------------- # Cleanup # --------------------------------------------------------------------------- def _cleanup_demo_data(db: Session) -> None: """Remove all previously seeded demo data.""" # Delete in order to respect FK constraints demo_users = db.query(User).filter(User.username.like(f"{DEMO_PREFIX}%")).all() # Assign demo_user_ids = [u.id for u in demo_users] demo_user_ids = [u.id for u in demo_users] # Check: demo_user_ids if demo_user_ids: # Notifications for demo users db.query(Notification).filter( Notification.user_id.in_(demo_user_ids) ).delete(synchronize_session=False) # Audit logs for demo users db.query(AuditLog).filter( AuditLog.user_id.in_(demo_user_ids) ).delete(synchronize_session=False) # Evidences for tests created by demo users demo_tests = db.query(Test).filter( Test.created_by.in_(demo_user_ids) ).all() # Assign demo_test_ids = [t.id for t in demo_tests] demo_test_ids = [t.id for t in demo_tests] # Check: demo_test_ids if demo_test_ids: # Begin database query db.query(Evidence).filter( Evidence.test_id.in_(demo_test_ids) ).delete(synchronize_session=False) # Begin database query db.query(Test).filter( Test.id.in_(demo_test_ids) ).delete(synchronize_session=False) # Delete demo templates (by source = "demo") db.query(TestTemplate).filter( TestTemplate.source == "demo" ).delete(synchronize_session=False) # Delete demo users if demo_user_ids: # Begin database query db.query(User).filter( User.id.in_(demo_user_ids) ).delete(synchronize_session=False) # Commit all pending changes to the database db.commit() # Log info: "Cleaned up existing demo data." logger.info("Cleaned up existing demo data.") # --------------------------------------------------------------------------- # Seeders # --------------------------------------------------------------------------- def _seed_users(db: Session) -> list[User]: """Create 5 users per role (25 total).""" # Assign users = [] users = [] # Iterate over ROLES for role in ROLES: # Iterate over range(1, 6) for i in range(1, 6): # Assign user = User( user = User( # Keyword argument: username username=f"{DEMO_PREFIX}{role}_{i}", # Keyword argument: email email=f"{DEMO_PREFIX}{role}_{i}@aegis-demo.local", # Keyword argument: hashed_password hashed_password=hash_password("demo123"), # Keyword argument: role role=role, # Keyword argument: is_active is_active=True, ) # Stage new record(s) for database insertion db.add(user) # Call users.append() users.append(user) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo users.", len(users logger.info("Created %d demo users.", len(users)) # Return users return users # Define function _seed_technique_statuses def _seed_technique_statuses(db: Session, count: int = 50) -> list[Technique]: """Set varied statuses on up to *count* techniques.""" # Assign techniques = db.query(Technique).limit(count).all() techniques = db.query(Technique).limit(count).all() # Check: not techniques if not techniques: # Log warning: "No techniques found — run MITRE sync first!" logger.warning("No techniques found — run MITRE sync first!") # Return [] return [] # Iterate over techniques for tech in techniques: # Assign tech.status_global = random.choice(TECHNIQUE_STATUSES) tech.status_global = random.choice(TECHNIQUE_STATUSES) # Check: tech.status_global == TechniqueStatus.validated if tech.status_global == TechniqueStatus.validated: # Assign tech.last_review_date = datetime.utcnow() - timedelta( tech.last_review_date = datetime.utcnow() - timedelta( # Keyword argument: days days=random.randint(1, 30) ) # Flush changes to DB without committing the transaction db.flush() # Log info: "Updated status on %d techniques.", len(techniques logger.info("Updated status on %d techniques.", len(techniques)) # Return techniques return techniques # Define function _seed_tests def _seed_tests(db: Session, users: list[User], techniques: list[Technique], count: int = 100) -> list[Test]: """Create *count* tests in various pipeline states.""" # Check: not techniques if not techniques: # Log warning: "No techniques available — skipping test seeding." logger.warning("No techniques available — skipping test seeding.") # Return [] return [] # Assign red_techs = [u for u in users if u.role == "red_tech"] red_techs = [u for u in users if u.role == "red_tech"] # Assign blue_techs = [u for u in users if u.role == "blue_tech"] blue_techs = [u for u in users if u.role == "blue_tech"] # Assign red_leads = [u for u in users if u.role == "red_lead"] red_leads = [u for u in users if u.role == "red_lead"] # Assign blue_leads = [u for u in users if u.role == "blue_lead"] blue_leads = [u for u in users if u.role == "blue_lead"] # Assign tests = [] tests = [] # Iterate over range(count) for i in range(count): # Assign technique = random.choice(techniques) technique = random.choice(techniques) # Assign state = random.choice(TEST_STATES) state = random.choice(TEST_STATES) # Assign creator = random.choice(red_techs + blue_techs) creator = random.choice(red_techs + blue_techs) # Assign test = Test( test = Test( # Keyword argument: technique_id technique_id=technique.id, # Keyword argument: name name=f"Demo Test {i + 1} — {technique.name[:40]}", # Keyword argument: description description=f"Automated demo test #{i + 1} for {technique.mitre_id}.", # Keyword argument: platform platform=random.choice(PLATFORMS), # Keyword argument: procedure_text procedure_text=( f"Step 1: Prepare environment.\n" f"Step 2: Execute {technique.mitre_id} procedure.\n" f"Step 3: Observe results." ), # Keyword argument: tool_used tool_used=random.choice(["powershell", "bash", "cmd", "python", "caldera", "metasploit"]), # Keyword argument: execution_date execution_date=datetime.utcnow() - timedelta(days=random.randint(0, 60)), # Keyword argument: created_by created_by=creator.id, # Keyword argument: result result=random.choice(TEST_RESULTS) if state not in (TestState.draft, TestState.red_executing) else None, # Keyword argument: state state=state, # Keyword argument: created_at created_at=datetime.utcnow() - timedelta(days=random.randint(0, 90)), ) # Populate team fields based on state if state in (TestState.blue_evaluating, TestState.in_review, TestState.validated, TestState.rejected): # Assign test.red_summary = f"Attack executed successfully using {test.tool_used}." test.red_summary = f"Attack executed successfully using {test.tool_used}." # Assign test.attack_success = random.choice([True, True, True, False]) test.attack_success = random.choice([True, True, True, False]) # Check: state in (TestState.in_review, TestState.validated, TestState.rejec... if state in (TestState.in_review, TestState.validated, TestState.rejected): # Assign test.blue_summary = "Detection observed in SIEM. Alert fired." test.blue_summary = "Detection observed in SIEM. Alert fired." # Assign test.detection_result = random.choice(TEST_RESULTS) test.detection_result = random.choice(TEST_RESULTS) # Check: state == TestState.validated if state == TestState.validated: # Assign rv = random.choice(red_leads) rv = random.choice(red_leads) # Assign bv = random.choice(blue_leads) bv = random.choice(blue_leads) # Assign test.red_validated_by = rv.id test.red_validated_by = rv.id # Assign test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10)) test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10)) # Assign test.red_validation_status = "approved" test.red_validation_status = "approved" # Assign test.blue_validated_by = bv.id test.blue_validated_by = bv.id # Assign test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10)) test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 10)) # Assign test.blue_validation_status = "approved" test.blue_validation_status = "approved" # Check: state == TestState.rejected if state == TestState.rejected: # Assign rejector = random.choice(red_leads + blue_leads) rejector = random.choice(red_leads + blue_leads) # Check: rejector.role == "red_lead" if rejector.role == "red_lead": # Assign test.red_validated_by = rejector.id test.red_validated_by = rejector.id # Assign test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5)) test.red_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5)) # Assign test.red_validation_status = "rejected" test.red_validation_status = "rejected" # Assign test.red_validation_notes = "Insufficient evidence of attack success." test.red_validation_notes = "Insufficient evidence of attack success." # Fallback: handle remaining cases else: # Assign test.blue_validated_by = rejector.id test.blue_validated_by = rejector.id # Assign test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5)) test.blue_validated_at = datetime.utcnow() - timedelta(days=random.randint(0, 5)) # Assign test.blue_validation_status = "rejected" test.blue_validation_status = "rejected" # Assign test.blue_validation_notes = "Detection evidence not conclusive." test.blue_validation_notes = "Detection evidence not conclusive." # Stage new record(s) for database insertion db.add(test) # Call tests.append() tests.append(test) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo tests.", len(tests logger.info("Created %d demo tests.", len(tests)) # Return tests return tests # Define function _seed_evidences def _seed_evidences(db: Session, tests: list[Test], users: list[User], count: int = 50) -> list[Evidence]: """Create *count* dummy evidence records.""" # Check: not tests if not tests: # Return [] return [] # Pick tests that are past draft state eligible = [t for t in tests if t.state != TestState.draft] # Check: not eligible if not eligible: # Assign eligible = tests eligible = tests # Assign evidences = [] evidences = [] # Assign red_blue = [u for u in users if u.role in ("red_tech", "blue_tech")] red_blue = [u for u in users if u.role in ("red_tech", "blue_tech")] # Iterate over range(count) for i in range(count): # Assign test = random.choice(eligible) test = random.choice(eligible) # Assign uploader = random.choice(red_blue) uploader = random.choice(red_blue) # Assign team = TeamSide.red if uploader.role == "red_tech" else TeamSide.blue team = TeamSide.red if uploader.role == "red_tech" else TeamSide.blue # Assign ext = random.choice(["png", "log", "pcap", "csv", "txt", "json"]) ext = random.choice(["png", "log", "pcap", "csv", "txt", "json"]) # Assign fname = f"evidence_{i + 1}.{ext}" fname = f"evidence_{i + 1}.{ext}" # Assign evidence = Evidence( evidence = Evidence( # Keyword argument: test_id test_id=test.id, # Keyword argument: file_name file_name=fname, # Keyword argument: file_path file_path=f"{test.id}/{uuid.uuid4()}_{fname}", # Keyword argument: sha256_hash sha256_hash=uuid.uuid4().hex + uuid.uuid4().hex, # dummy hash # Keyword argument: uploaded_by uploaded_by=uploader.id, # Keyword argument: uploaded_at uploaded_at=datetime.utcnow() - timedelta(days=random.randint(0, 30)), # Keyword argument: team team=team, # Keyword argument: notes notes=f"Auto-generated demo evidence #{i + 1}.", ) # Stage new record(s) for database insertion db.add(evidence) # Call evidences.append() evidences.append(evidence) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo evidences.", len(evidences logger.info("Created %d demo evidences.", len(evidences)) # Return evidences return evidences # Define function _seed_audit_logs def _seed_audit_logs(db: Session, users: list[User], count: int = 20) -> None: """Create *count* varied audit log entries.""" # Iterate over range(count) for i in range(count): # Assign user = random.choice(users) user = random.choice(users) # Assign log = AuditLog( log = AuditLog( # Keyword argument: user_id user_id=user.id, # Keyword argument: action action=random.choice(AUDIT_ACTIONS), # Keyword argument: entity_type entity_type=random.choice(["test", "technique", "user", "test_template"]), # Keyword argument: entity_id entity_id=str(uuid.uuid4()), # Keyword argument: timestamp timestamp=datetime.utcnow() - timedelta(days=random.randint(0, 60)), # Keyword argument: details details={"demo": True, "index": i}, ) # Stage new record(s) for database insertion db.add(log) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo audit logs.", count logger.info("Created %d demo audit logs.", count) # Define function _seed_notifications def _seed_notifications(db: Session, users: list[User], count: int = 30) -> None: """Create *count* notifications spread across demo users.""" # Iterate over range(count) for i in range(count): # Assign user = random.choice(users) user = random.choice(users) # Assign ntype = random.choice(NOTIFICATION_TYPES) ntype = random.choice(NOTIFICATION_TYPES) # Assign notif = Notification( notif = Notification( # Keyword argument: user_id user_id=user.id, # Keyword argument: type type=ntype, # Keyword argument: title title=f"Demo notification: {ntype.replace('_', ' ').title()} #{i + 1}", # Keyword argument: message message=f"This is an auto-generated demo notification ({ntype}).", # Keyword argument: entity_type entity_type="test", # Keyword argument: entity_id entity_id=uuid.uuid4(), # Keyword argument: read read=random.choice([True, False]), # Keyword argument: created_at created_at=datetime.utcnow() - timedelta(days=random.randint(0, 30)), ) # Stage new record(s) for database insertion db.add(notif) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo notifications.", count logger.info("Created %d demo notifications.", count) # Define function _seed_templates def _seed_templates(db: Session, techniques: list[Technique], count: int = 10) -> None: """Create *count* manual demo templates.""" # Check: not techniques if not techniques: # Return control to caller return # Iterate over enumerate(TEMPLATE_NAMES[ for i, name in enumerate(TEMPLATE_NAMES[:count]): # Assign technique = techniques[i % len(techniques)] technique = techniques[i % len(techniques)] # Assign template = TestTemplate( template = TestTemplate( # Keyword argument: mitre_technique_id mitre_technique_id=technique.mitre_id, # Keyword argument: name name=name, # Keyword argument: description description=f"Demo template: {name}. Targets {technique.mitre_id} ({technique.name}).", # Keyword argument: source source="demo", # Keyword argument: source_url source_url=None, # Keyword argument: attack_procedure attack_procedure=( f"1. Set up environment for {technique.mitre_id}.\n" # Literal argument value "2. Execute the procedure.\n" # Literal argument value "3. Record observations." ), # Keyword argument: expected_detection expected_detection=f"SIEM should alert on {technique.mitre_id} indicators.", # Keyword argument: platform platform=random.choice(PLATFORMS), # Keyword argument: tool_suggested tool_suggested=random.choice(["powershell", "cmd", "bash", "python"]), # Keyword argument: severity severity=random.choice(["low", "medium", "high", "critical"]), # Keyword argument: is_active is_active=True, ) # Stage new record(s) for database insertion db.add(template) # Flush changes to DB without committing the transaction db.flush() # Log info: "Created %d demo templates.", count logger.info("Created %d demo templates.", count) # --------------------------------------------------------------------------- # Main entry point # --------------------------------------------------------------------------- def seed_demo() -> dict: """Generate all demo data. Returns a summary dict.""" # Assign db = SessionLocal() db = SessionLocal() # Attempt the following; catch errors below try: # Log info: "=== Starting V3 demo seed ===" logger.info("=== Starting V3 demo seed ===") # Step 0: cleanup previous run _cleanup_demo_data(db) # Step 1: users users = _seed_users(db) # Step 2: technique statuses techniques = _seed_technique_statuses(db, count=50) # Step 3: tests tests = _seed_tests(db, users, techniques, count=100) # Step 4: evidences evidences = _seed_evidences(db, tests, users, count=50) # Step 5: audit logs _seed_audit_logs(db, users, count=20) # Step 6: notifications _seed_notifications(db, users, count=30) # Step 7: templates _seed_templates(db, techniques, count=10) # Commit all pending changes to the database db.commit() # Assign summary = { summary = { # Literal argument value "users": len(users), # Literal argument value "techniques_updated": len(techniques), # Literal argument value "tests": len(tests), # Literal argument value "evidences": len(evidences), # Literal argument value "audit_logs": 20, # Literal argument value "notifications": 30, # Literal argument value "templates": 10, } # Log info: "=== Demo seed complete: %s ===", summary logger.info("=== Demo seed complete: %s ===", summary) # Return summary return summary # Handle Exception except Exception: # Roll back all uncommitted changes db.rollback() # raise raise # Always execute this cleanup block finally: # Close the database session db.close() # Check: __name__ == "__main__" if __name__ == "__main__": # Call logging.basicConfig() logging.basicConfig( # Keyword argument: level level=logging.INFO, # Keyword argument: format format="%(asctime)s %(levelname)-8s %(name)s — %(message)s", ) # Assign result = seed_demo() result = seed_demo() # Call print() print(f"\nSeed complete: {result}")