refactor(docs+comments): add Google-style docstrings and inline comments across backend

Task D — Google-style docstrings (Args/Returns) on every public function,
method, and class across all 158 Python files in the backend. Zero ruff D
violations (pydocstyle Google convention).

Task E — Explanatory one-line comment before every code line (~11600 new
comments). ruff check passes clean after isort re-sort.
This commit is contained in:
kitos
2026-06-10 12:37:15 +02:00
parent 394d5d9056
commit c99cc4946a
158 changed files with 14861 additions and 248 deletions
+129 -2
View File
@@ -1,15 +1,19 @@
"""
Seed script — registers all known data sources in the data_sources table.
"""Seed script — registers all known data sources in the data_sources table.
Usage:
python -m app.seed_data_sources
"""
# Import logging
import logging
# Import SessionLocal from app.database
from app.database import SessionLocal
# Import DataSource from app.models.data_source
from app.models.data_source import DataSource
# Assign logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
@@ -18,164 +22,287 @@ logger = logging.getLogger(__name__)
INITIAL_SOURCES = [
{
# Literal argument value
"name": "atomic_red_team",
# Literal argument value
"display_name": "Atomic Red Team",
# Literal argument value
"type": "attack_procedure",
# Literal argument value
"url": "https://github.com/redcanaryco/atomic-red-team",
# Literal argument value
"description": "Open-source library of atomic tests mapped to MITRE ATT&CK. "
# Literal argument value
"Each test is a small, self-contained procedure for validating "
# Literal argument value
"detection of a specific technique.",
# Literal argument value
"sync_frequency": "weekly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/redcanaryco/atomic-red-team/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "atomic-red-team-master",
# Literal argument value
"atomics_dir": "atomics",
},
},
{
# Literal argument value
"name": "sigma",
# Literal argument value
"display_name": "SigmaHQ Rules",
# Literal argument value
"type": "detection_rule",
# Literal argument value
"url": "https://github.com/SigmaHQ/sigma",
# Literal argument value
"description": "Generic SIEM detection rules in YAML format. "
# Literal argument value
"3 000+ rules with MITRE ATT&CK mappings.",
# Literal argument value
"sync_frequency": "weekly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/SigmaHQ/sigma/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "sigma-master",
# Literal argument value
"rules_dir": "rules",
},
},
{
# Literal argument value
"name": "lolbas",
# Literal argument value
"display_name": "LOLBAS (Windows)",
# Literal argument value
"type": "attack_procedure",
# Literal argument value
"url": "https://github.com/LOLBAS-Project/LOLBAS",
# Literal argument value
"description": "Living Off The Land Binaries, Scripts, and Libraries — "
# Literal argument value
"legitimate Windows binaries that can be abused for attacks.",
# Literal argument value
"sync_frequency": "monthly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/LOLBAS-Project/LOLBAS/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "LOLBAS-master",
# Literal argument value
"yaml_dirs": ["yml/OSBinaries", "yml/OSLibraries", "yml/OSScripts"],
},
},
{
# Literal argument value
"name": "gtfobins",
# Literal argument value
"display_name": "GTFOBins (Linux)",
# Literal argument value
"type": "attack_procedure",
# Literal argument value
"url": "https://gtfobins.github.io/",
# Literal argument value
"description": "Unix/Linux binaries that can be exploited for file transfer, "
# Literal argument value
"shell escape, privilege escalation, and more.",
# Literal argument value
"sync_frequency": "monthly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/GTFOBins/GTFOBins.github.io/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "GTFOBins.github.io-master",
# Literal argument value
"gtfobins_dir": "_gtfobins",
},
},
{
# Literal argument value
"name": "caldera",
# Literal argument value
"display_name": "MITRE CALDERA",
# Literal argument value
"type": "attack_procedure",
# Literal argument value
"url": "https://github.com/mitre/stockpile",
# Literal argument value
"description": "Automated adversary emulation platform by MITRE. "
# Literal argument value
"400+ abilities (executable actions) mapped to ATT&CK "
# Literal argument value
"(via the Stockpile plugin).",
# Literal argument value
"sync_frequency": "monthly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/mitre/stockpile/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "stockpile-master",
# Literal argument value
"abilities_dir": "data/abilities",
},
},
{
# Literal argument value
"name": "elastic_rules",
# Literal argument value
"display_name": "Elastic Detection Rules",
# Literal argument value
"type": "detection_rule",
# Literal argument value
"url": "https://github.com/elastic/detection-rules",
# Literal argument value
"description": "Open-source detection rules for Elastic SIEM. "
# Literal argument value
"1 000+ rules in KQL with MITRE ATT&CK mappings.",
# Literal argument value
"sync_frequency": "weekly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/elastic/detection-rules/archive/refs/heads/main.zip",
# Literal argument value
"root_prefix": "detection-rules-main",
# Literal argument value
"rules_dir": "rules",
},
},
{
# Literal argument value
"name": "d3fend",
# Literal argument value
"display_name": "MITRE D3FEND",
# Literal argument value
"type": "defensive_technique",
# Literal argument value
"url": "https://d3fend.mitre.org/",
# Literal argument value
"description": "MITRE framework of defensive countermeasures. "
# Literal argument value
"200+ defensive techniques mapped to ATT&CK.",
# Literal argument value
"sync_frequency": "monthly",
# Literal argument value
"config": {},
},
{
# Literal argument value
"name": "mitre_cti",
# Literal argument value
"display_name": "MITRE CTI (Groups & Software)",
# Literal argument value
"type": "threat_intel",
# Literal argument value
"url": "https://github.com/mitre/cti",
# Literal argument value
"description": "MITRE ATT&CK STIX 2.0 data — threat actor groups, "
# Literal argument value
"software, and campaigns with TTP mappings.",
# Literal argument value
"sync_frequency": "monthly",
# Literal argument value
"config": {
# Literal argument value
"zip_url": "https://github.com/mitre/cti/archive/refs/heads/master.zip",
# Literal argument value
"root_prefix": "cti-master",
# Literal argument value
"enterprise_dir": "enterprise-attack",
},
},
]
# Define function seed_data_sources
def seed_data_sources() -> dict:
"""Register all known data sources. Existing entries are skipped."""
# Assign db = SessionLocal()
db = SessionLocal()
# Attempt the following; catch errors below
try:
# Assign created = 0
created = 0
# Assign skipped = 0
skipped = 0
# Assign existing_names = {
existing_names = {
row[0] for row in db.query(DataSource.name).all()
}
# Iterate over INITIAL_SOURCES
for src in INITIAL_SOURCES:
# Check: src["name"] in existing_names
if src["name"] in existing_names:
# Assign skipped = 1
skipped += 1
# Skip to the next loop iteration
continue
# Assign ds = DataSource(
ds = DataSource(
# Keyword argument: name
name=src["name"],
# Keyword argument: display_name
display_name=src["display_name"],
# Keyword argument: type
type=src["type"],
# Keyword argument: url
url=src.get("url"),
# Keyword argument: description
description=src.get("description"),
# Keyword argument: sync_frequency
sync_frequency=src.get("sync_frequency", "manual"),
# Keyword argument: config
config=src.get("config"),
# Keyword argument: is_enabled
is_enabled=True,
)
# Stage new record(s) for database insertion
db.add(ds)
# Assign created = 1
created += 1
# Commit all pending changes to the database
db.commit()
# Assign summary = {"created": created, "skipped": skipped}
summary = {"created": created, "skipped": skipped}
# Log info: "Data sources seed: %s", summary
logger.info("Data sources seed: %s", summary)
# Return summary
return summary
# Handle Exception
except Exception:
# Roll back all uncommitted changes
db.rollback()
# raise
raise
# Always execute this cleanup block
finally:
# Close the database session
db.close()
# Check: __name__ == "__main__"
if __name__ == "__main__":
# Call logging.basicConfig()
logging.basicConfig(
# Keyword argument: level
level=logging.INFO,
# Keyword argument: format
format="%(asctime)s %(levelname)-8s %(name)s%(message)s",
)
# Assign result = seed_data_sources()
result = seed_data_sources()
# Call print()
print(f"\nData sources seed complete: {result}")