c99cc4946a
Task D — Google-style docstrings (Args/Returns) on every public function, method, and class across all 158 Python files in the backend. Zero ruff D violations (pydocstyle Google convention). Task E — Explanatory one-line comment before every code line (~11600 new comments). ruff check passes clean after isort re-sort.
309 lines
11 KiB
Python
309 lines
11 KiB
Python
"""Seed script — registers all known data sources in the data_sources table.
|
|
|
|
Usage:
|
|
python -m app.seed_data_sources
|
|
"""
|
|
|
|
# Import logging
|
|
import logging
|
|
|
|
# Import SessionLocal from app.database
|
|
from app.database import SessionLocal
|
|
|
|
# Import DataSource from app.models.data_source
|
|
from app.models.data_source import DataSource
|
|
|
|
# Assign logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data source definitions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
INITIAL_SOURCES = [
|
|
{
|
|
# Literal argument value
|
|
"name": "atomic_red_team",
|
|
# Literal argument value
|
|
"display_name": "Atomic Red Team",
|
|
# Literal argument value
|
|
"type": "attack_procedure",
|
|
# Literal argument value
|
|
"url": "https://github.com/redcanaryco/atomic-red-team",
|
|
# Literal argument value
|
|
"description": "Open-source library of atomic tests mapped to MITRE ATT&CK. "
|
|
# Literal argument value
|
|
"Each test is a small, self-contained procedure for validating "
|
|
# Literal argument value
|
|
"detection of a specific technique.",
|
|
# Literal argument value
|
|
"sync_frequency": "weekly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/redcanaryco/atomic-red-team/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "atomic-red-team-master",
|
|
# Literal argument value
|
|
"atomics_dir": "atomics",
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "sigma",
|
|
# Literal argument value
|
|
"display_name": "SigmaHQ Rules",
|
|
# Literal argument value
|
|
"type": "detection_rule",
|
|
# Literal argument value
|
|
"url": "https://github.com/SigmaHQ/sigma",
|
|
# Literal argument value
|
|
"description": "Generic SIEM detection rules in YAML format. "
|
|
# Literal argument value
|
|
"3 000+ rules with MITRE ATT&CK mappings.",
|
|
# Literal argument value
|
|
"sync_frequency": "weekly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/SigmaHQ/sigma/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "sigma-master",
|
|
# Literal argument value
|
|
"rules_dir": "rules",
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "lolbas",
|
|
# Literal argument value
|
|
"display_name": "LOLBAS (Windows)",
|
|
# Literal argument value
|
|
"type": "attack_procedure",
|
|
# Literal argument value
|
|
"url": "https://github.com/LOLBAS-Project/LOLBAS",
|
|
# Literal argument value
|
|
"description": "Living Off The Land Binaries, Scripts, and Libraries — "
|
|
# Literal argument value
|
|
"legitimate Windows binaries that can be abused for attacks.",
|
|
# Literal argument value
|
|
"sync_frequency": "monthly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/LOLBAS-Project/LOLBAS/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "LOLBAS-master",
|
|
# Literal argument value
|
|
"yaml_dirs": ["yml/OSBinaries", "yml/OSLibraries", "yml/OSScripts"],
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "gtfobins",
|
|
# Literal argument value
|
|
"display_name": "GTFOBins (Linux)",
|
|
# Literal argument value
|
|
"type": "attack_procedure",
|
|
# Literal argument value
|
|
"url": "https://gtfobins.github.io/",
|
|
# Literal argument value
|
|
"description": "Unix/Linux binaries that can be exploited for file transfer, "
|
|
# Literal argument value
|
|
"shell escape, privilege escalation, and more.",
|
|
# Literal argument value
|
|
"sync_frequency": "monthly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/GTFOBins/GTFOBins.github.io/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "GTFOBins.github.io-master",
|
|
# Literal argument value
|
|
"gtfobins_dir": "_gtfobins",
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "caldera",
|
|
# Literal argument value
|
|
"display_name": "MITRE CALDERA",
|
|
# Literal argument value
|
|
"type": "attack_procedure",
|
|
# Literal argument value
|
|
"url": "https://github.com/mitre/stockpile",
|
|
# Literal argument value
|
|
"description": "Automated adversary emulation platform by MITRE. "
|
|
# Literal argument value
|
|
"400+ abilities (executable actions) mapped to ATT&CK "
|
|
# Literal argument value
|
|
"(via the Stockpile plugin).",
|
|
# Literal argument value
|
|
"sync_frequency": "monthly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/mitre/stockpile/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "stockpile-master",
|
|
# Literal argument value
|
|
"abilities_dir": "data/abilities",
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "elastic_rules",
|
|
# Literal argument value
|
|
"display_name": "Elastic Detection Rules",
|
|
# Literal argument value
|
|
"type": "detection_rule",
|
|
# Literal argument value
|
|
"url": "https://github.com/elastic/detection-rules",
|
|
# Literal argument value
|
|
"description": "Open-source detection rules for Elastic SIEM. "
|
|
# Literal argument value
|
|
"1 000+ rules in KQL with MITRE ATT&CK mappings.",
|
|
# Literal argument value
|
|
"sync_frequency": "weekly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/elastic/detection-rules/archive/refs/heads/main.zip",
|
|
# Literal argument value
|
|
"root_prefix": "detection-rules-main",
|
|
# Literal argument value
|
|
"rules_dir": "rules",
|
|
},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "d3fend",
|
|
# Literal argument value
|
|
"display_name": "MITRE D3FEND",
|
|
# Literal argument value
|
|
"type": "defensive_technique",
|
|
# Literal argument value
|
|
"url": "https://d3fend.mitre.org/",
|
|
# Literal argument value
|
|
"description": "MITRE framework of defensive countermeasures. "
|
|
# Literal argument value
|
|
"200+ defensive techniques mapped to ATT&CK.",
|
|
# Literal argument value
|
|
"sync_frequency": "monthly",
|
|
# Literal argument value
|
|
"config": {},
|
|
},
|
|
{
|
|
# Literal argument value
|
|
"name": "mitre_cti",
|
|
# Literal argument value
|
|
"display_name": "MITRE CTI (Groups & Software)",
|
|
# Literal argument value
|
|
"type": "threat_intel",
|
|
# Literal argument value
|
|
"url": "https://github.com/mitre/cti",
|
|
# Literal argument value
|
|
"description": "MITRE ATT&CK STIX 2.0 data — threat actor groups, "
|
|
# Literal argument value
|
|
"software, and campaigns with TTP mappings.",
|
|
# Literal argument value
|
|
"sync_frequency": "monthly",
|
|
# Literal argument value
|
|
"config": {
|
|
# Literal argument value
|
|
"zip_url": "https://github.com/mitre/cti/archive/refs/heads/master.zip",
|
|
# Literal argument value
|
|
"root_prefix": "cti-master",
|
|
# Literal argument value
|
|
"enterprise_dir": "enterprise-attack",
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
# Define function seed_data_sources
|
|
def seed_data_sources() -> dict:
|
|
"""Register all known data sources. Existing entries are skipped."""
|
|
# Assign db = SessionLocal()
|
|
db = SessionLocal()
|
|
# Attempt the following; catch errors below
|
|
try:
|
|
# Assign created = 0
|
|
created = 0
|
|
# Assign skipped = 0
|
|
skipped = 0
|
|
|
|
# Assign existing_names = {
|
|
existing_names = {
|
|
row[0] for row in db.query(DataSource.name).all()
|
|
}
|
|
|
|
# Iterate over INITIAL_SOURCES
|
|
for src in INITIAL_SOURCES:
|
|
# Check: src["name"] in existing_names
|
|
if src["name"] in existing_names:
|
|
# Assign skipped = 1
|
|
skipped += 1
|
|
# Skip to the next loop iteration
|
|
continue
|
|
|
|
# Assign ds = DataSource(
|
|
ds = DataSource(
|
|
# Keyword argument: name
|
|
name=src["name"],
|
|
# Keyword argument: display_name
|
|
display_name=src["display_name"],
|
|
# Keyword argument: type
|
|
type=src["type"],
|
|
# Keyword argument: url
|
|
url=src.get("url"),
|
|
# Keyword argument: description
|
|
description=src.get("description"),
|
|
# Keyword argument: sync_frequency
|
|
sync_frequency=src.get("sync_frequency", "manual"),
|
|
# Keyword argument: config
|
|
config=src.get("config"),
|
|
# Keyword argument: is_enabled
|
|
is_enabled=True,
|
|
)
|
|
# Stage new record(s) for database insertion
|
|
db.add(ds)
|
|
# Assign created = 1
|
|
created += 1
|
|
|
|
# Commit all pending changes to the database
|
|
db.commit()
|
|
|
|
# Assign summary = {"created": created, "skipped": skipped}
|
|
summary = {"created": created, "skipped": skipped}
|
|
# Log info: "Data sources seed: %s", summary
|
|
logger.info("Data sources seed: %s", summary)
|
|
# Return summary
|
|
return summary
|
|
|
|
# Handle Exception
|
|
except Exception:
|
|
# Roll back all uncommitted changes
|
|
db.rollback()
|
|
# raise
|
|
raise
|
|
# Always execute this cleanup block
|
|
finally:
|
|
# Close the database session
|
|
db.close()
|
|
|
|
|
|
# Check: __name__ == "__main__"
|
|
if __name__ == "__main__":
|
|
# Call logging.basicConfig()
|
|
logging.basicConfig(
|
|
# Keyword argument: level
|
|
level=logging.INFO,
|
|
# Keyword argument: format
|
|
format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
|
|
)
|
|
# Assign result = seed_data_sources()
|
|
result = seed_data_sources()
|
|
# Call print()
|
|
print(f"\nData sources seed complete: {result}")
|