"""Seed script — registers all known data sources in the data_sources table. Usage: python -m app.seed_data_sources """ # Import logging import logging # Import SessionLocal from app.database from app.database import SessionLocal # Import DataSource from app.models.data_source from app.models.data_source import DataSource # Assign logger = logging.getLogger(__name__) logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Data source definitions # --------------------------------------------------------------------------- INITIAL_SOURCES = [ { # Literal argument value "name": "atomic_red_team", # Literal argument value "display_name": "Atomic Red Team", # Literal argument value "type": "attack_procedure", # Literal argument value "url": "https://github.com/redcanaryco/atomic-red-team", # Literal argument value "description": "Open-source library of atomic tests mapped to MITRE ATT&CK. " # Literal argument value "Each test is a small, self-contained procedure for validating " # Literal argument value "detection of a specific technique.", # Literal argument value "sync_frequency": "weekly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/redcanaryco/atomic-red-team/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "atomic-red-team-master", # Literal argument value "atomics_dir": "atomics", }, }, { # Literal argument value "name": "sigma", # Literal argument value "display_name": "SigmaHQ Rules", # Literal argument value "type": "detection_rule", # Literal argument value "url": "https://github.com/SigmaHQ/sigma", # Literal argument value "description": "Generic SIEM detection rules in YAML format. " # Literal argument value "3 000+ rules with MITRE ATT&CK mappings.", # Literal argument value "sync_frequency": "weekly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/SigmaHQ/sigma/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "sigma-master", # Literal argument value "rules_dir": "rules", }, }, { # Literal argument value "name": "lolbas", # Literal argument value "display_name": "LOLBAS (Windows)", # Literal argument value "type": "attack_procedure", # Literal argument value "url": "https://github.com/LOLBAS-Project/LOLBAS", # Literal argument value "description": "Living Off The Land Binaries, Scripts, and Libraries — " # Literal argument value "legitimate Windows binaries that can be abused for attacks.", # Literal argument value "sync_frequency": "monthly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/LOLBAS-Project/LOLBAS/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "LOLBAS-master", # Literal argument value "yaml_dirs": ["yml/OSBinaries", "yml/OSLibraries", "yml/OSScripts"], }, }, { # Literal argument value "name": "gtfobins", # Literal argument value "display_name": "GTFOBins (Linux)", # Literal argument value "type": "attack_procedure", # Literal argument value "url": "https://gtfobins.github.io/", # Literal argument value "description": "Unix/Linux binaries that can be exploited for file transfer, " # Literal argument value "shell escape, privilege escalation, and more.", # Literal argument value "sync_frequency": "monthly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/GTFOBins/GTFOBins.github.io/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "GTFOBins.github.io-master", # Literal argument value "gtfobins_dir": "_gtfobins", }, }, { # Literal argument value "name": "caldera", # Literal argument value "display_name": "MITRE CALDERA", # Literal argument value "type": "attack_procedure", # Literal argument value "url": "https://github.com/mitre/stockpile", # Literal argument value "description": "Automated adversary emulation platform by MITRE. " # Literal argument value "400+ abilities (executable actions) mapped to ATT&CK " # Literal argument value "(via the Stockpile plugin).", # Literal argument value "sync_frequency": "monthly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/mitre/stockpile/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "stockpile-master", # Literal argument value "abilities_dir": "data/abilities", }, }, { # Literal argument value "name": "elastic_rules", # Literal argument value "display_name": "Elastic Detection Rules", # Literal argument value "type": "detection_rule", # Literal argument value "url": "https://github.com/elastic/detection-rules", # Literal argument value "description": "Open-source detection rules for Elastic SIEM. " # Literal argument value "1 000+ rules in KQL with MITRE ATT&CK mappings.", # Literal argument value "sync_frequency": "weekly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/elastic/detection-rules/archive/refs/heads/main.zip", # Literal argument value "root_prefix": "detection-rules-main", # Literal argument value "rules_dir": "rules", }, }, { # Literal argument value "name": "d3fend", # Literal argument value "display_name": "MITRE D3FEND", # Literal argument value "type": "defensive_technique", # Literal argument value "url": "https://d3fend.mitre.org/", # Literal argument value "description": "MITRE framework of defensive countermeasures. " # Literal argument value "200+ defensive techniques mapped to ATT&CK.", # Literal argument value "sync_frequency": "monthly", # Literal argument value "config": {}, }, { # Literal argument value "name": "mitre_cti", # Literal argument value "display_name": "MITRE CTI (Groups & Software)", # Literal argument value "type": "threat_intel", # Literal argument value "url": "https://github.com/mitre/cti", # Literal argument value "description": "MITRE ATT&CK STIX 2.0 data — threat actor groups, " # Literal argument value "software, and campaigns with TTP mappings.", # Literal argument value "sync_frequency": "monthly", # Literal argument value "config": { # Literal argument value "zip_url": "https://github.com/mitre/cti/archive/refs/heads/master.zip", # Literal argument value "root_prefix": "cti-master", # Literal argument value "enterprise_dir": "enterprise-attack", }, }, ] # Define function seed_data_sources def seed_data_sources() -> dict: """Register all known data sources. Existing entries are skipped.""" # Assign db = SessionLocal() db = SessionLocal() # Attempt the following; catch errors below try: # Assign created = 0 created = 0 # Assign skipped = 0 skipped = 0 # Assign existing_names = { existing_names = { row[0] for row in db.query(DataSource.name).all() } # Iterate over INITIAL_SOURCES for src in INITIAL_SOURCES: # Check: src["name"] in existing_names if src["name"] in existing_names: # Assign skipped = 1 skipped += 1 # Skip to the next loop iteration continue # Assign ds = DataSource( ds = DataSource( # Keyword argument: name name=src["name"], # Keyword argument: display_name display_name=src["display_name"], # Keyword argument: type type=src["type"], # Keyword argument: url url=src.get("url"), # Keyword argument: description description=src.get("description"), # Keyword argument: sync_frequency sync_frequency=src.get("sync_frequency", "manual"), # Keyword argument: config config=src.get("config"), # Keyword argument: is_enabled is_enabled=True, ) # Stage new record(s) for database insertion db.add(ds) # Assign created = 1 created += 1 # Commit all pending changes to the database db.commit() # Assign summary = {"created": created, "skipped": skipped} summary = {"created": created, "skipped": skipped} # Log info: "Data sources seed: %s", summary logger.info("Data sources seed: %s", summary) # Return summary return summary # Handle Exception except Exception: # Roll back all uncommitted changes db.rollback() # raise raise # Always execute this cleanup block finally: # Close the database session db.close() # Check: __name__ == "__main__" if __name__ == "__main__": # Call logging.basicConfig() logging.basicConfig( # Keyword argument: level level=logging.INFO, # Keyword argument: format format="%(asctime)s %(levelname)-8s %(name)s — %(message)s", ) # Assign result = seed_data_sources() result = seed_data_sources() # Call print() print(f"\nData sources seed complete: {result}")