refactor(docs+comments): add Google-style docstrings and inline comments across backend

Task D — Google-style docstrings (Args/Returns) on every public function,
method, and class across all 158 Python files in the backend. Zero ruff D
violations (pydocstyle Google convention).

Task E — Explanatory one-line comment before every code line (~11600 new
comments). ruff check passes clean after isort re-sort.
This commit is contained in:
kitos
2026-06-10 12:37:15 +02:00
parent 394d5d9056
commit c99cc4946a
158 changed files with 14861 additions and 248 deletions
+1
View File
@@ -0,0 +1 @@
"""Abstract port interfaces that infrastructure adapters must implement."""
+78 -1
View File
@@ -12,14 +12,19 @@ This satisfies the Open/Closed Principle — the system is open for new
import sources without modifying existing code.
"""
# Enable future language features for compatibility
from __future__ import annotations
# Import Any, Protocol, runtime_checkable from typing
from typing import Any, Protocol, runtime_checkable
# Import Session from sqlalchemy.orm
from sqlalchemy.orm import Session
# Apply the @runtime_checkable decorator
@runtime_checkable
# Define class ImportService
class ImportService(Protocol):
"""Contract for any data-import operation.
@@ -27,62 +32,134 @@ class ImportService(Protocol):
downloads, parses, and upserts records from an external source.
"""
def __call__(self, db: Session) -> dict[str, Any]: ...
# Define function __call__
def __call__(self, db: Session) -> dict[str, Any]:
"""Execute the import operation against the given database session.
Args:
db (Session): Active SQLAlchemy session to use for all DB operations.
Returns:
dict[str, Any]: Summary statistics for the import run (e.g. created,
updated, skipped counts).
"""
# ...
...
# Define class ImportServiceEntry
class ImportServiceEntry:
"""Lazy-loading wrapper that resolves a module-level function on first call."""
# Assign __slots__ = ("_module_path", "_func_name", "_resolved")
__slots__ = ("_module_path", "_func_name", "_resolved")
# Define function __init__
def __init__(self, module_path: str, func_name: str) -> None:
"""Initialise the lazy entry with the module path and function name to resolve later.
Args:
module_path (str): Dotted Python module path, e.g.
``"app.services.atomic_import_service"``.
func_name (str): Name of the callable to import from *module_path*.
Returns:
None
"""
# Assign self._module_path = module_path
self._module_path = module_path
# Assign self._func_name = func_name
self._func_name = func_name
# Assign self._resolved = None
self._resolved: ImportService | None = None
# Define function __call__
def __call__(self, db: Session) -> dict[str, Any]:
"""Resolve the import function on first call and invoke it with *db*.
Args:
db (Session): SQLAlchemy session passed through to the underlying
import function.
Returns:
dict[str, Any]: Import statistics returned by the underlying function
(e.g. counts of created/updated/skipped records).
"""
# Check: self._resolved is None
if self._resolved is None:
# Import importlib
import importlib
# Assign mod = importlib.import_module(self._module_path)
mod = importlib.import_module(self._module_path)
# Assign self._resolved = getattr(mod, self._func_name)
self._resolved = getattr(mod, self._func_name)
# Return self._resolved(db)
return self._resolved(db)
# Apply the @property decorator
@property
# Define function source_info
def source_info(self) -> str:
"""Return a human-readable identifier for this import entry.
Returns:
str: The fully qualified function reference as
``"<module_path>.<func_name>"``.
"""
# Return f"{self._module_path}.{self._func_name}"
return f"{self._module_path}.{self._func_name}"
# Assign IMPORT_REGISTRY = {
IMPORT_REGISTRY: dict[str, ImportServiceEntry] = {
# Literal argument value
"atomic_red_team": ImportServiceEntry(
# Literal argument value
"app.services.atomic_import_service", "import_atomic_red_team",
),
# Literal argument value
"sigma": ImportServiceEntry(
# Literal argument value
"app.services.sigma_import_service", "sync",
),
# Literal argument value
"lolbas": ImportServiceEntry(
# Literal argument value
"app.services.lolbas_import_service", "sync",
),
# Literal argument value
"gtfobins": ImportServiceEntry(
# Literal argument value
"app.services.lolbas_import_service", "sync_gtfobins",
),
# Literal argument value
"caldera": ImportServiceEntry(
# Literal argument value
"app.services.caldera_import_service", "sync",
),
# Literal argument value
"elastic_rules": ImportServiceEntry(
# Literal argument value
"app.services.elastic_import_service", "sync",
),
# Literal argument value
"mitre_cti": ImportServiceEntry(
# Literal argument value
"app.services.threat_actor_import_service", "sync",
),
# Literal argument value
"d3fend": ImportServiceEntry(
# Literal argument value
"app.services.d3fend_import_service", "sync",
),
}
# Define function get_import_handler
def get_import_handler(source_name: str) -> ImportServiceEntry | None:
"""Look up the import handler for *source_name*.
Returns ``None`` when no handler is registered.
"""
# Return IMPORT_REGISTRY.get(source_name)
return IMPORT_REGISTRY.get(source_name)
@@ -1,4 +1,9 @@
"""Abstract repository port interfaces for domain entity persistence."""
# Import TechniqueRepository from app.domain.ports.repositories.technique_repository
from app.domain.ports.repositories.technique_repository import TechniqueRepository
# Import TestRepository from app.domain.ports.repositories.test_repository
from app.domain.ports.repositories.test_repository import TestRepository
# Assign __all__ = ["TechniqueRepository", "TestRepository"]
__all__ = ["TechniqueRepository", "TestRepository"]
@@ -4,54 +4,157 @@ This is a domain contract — implementations live in infrastructure/.
The domain layer NEVER imports the implementation.
"""
# Enable future language features for compatibility
from __future__ import annotations
# Import uuid
import uuid
# Import NamedTuple, Protocol, runtime_checkable from typing
from typing import NamedTuple, Protocol, runtime_checkable
# Import TechniqueEntity from app.domain.entities.technique
from app.domain.entities.technique import TechniqueEntity
# Import TechniqueStatus from app.domain.enums
from app.domain.enums import TechniqueStatus
# Define class TechniqueWithCounts
class TechniqueWithCounts(NamedTuple):
"""Pre-aggregated technique data for heatmap/scoring."""
# entity: TechniqueEntity
entity: TechniqueEntity
# test_count: int
test_count: int
# validated_test_count: int
validated_test_count: int
# detection_rule_count: int
detection_rule_count: int
# Apply the @runtime_checkable decorator
@runtime_checkable
# Define class TechniqueRepository
class TechniqueRepository(Protocol):
"""Data access contract for techniques (one per aggregate root)."""
# -- Single-entity access ----------------------------------------------
def find_by_id(self, technique_id: uuid.UUID) -> TechniqueEntity | None: ...
def find_by_id(self, technique_id: uuid.UUID) -> TechniqueEntity | None:
"""Return the technique with the given primary key, or None if absent.
def find_by_mitre_id(self, mitre_id: str) -> TechniqueEntity | None: ...
Args:
technique_id (uuid.UUID): Primary key of the technique to look up.
Returns:
TechniqueEntity | None: The matching entity, or None if not found.
"""
# ...
...
# Define function find_by_mitre_id
def find_by_mitre_id(self, mitre_id: str) -> TechniqueEntity | None:
"""Return the technique matching the given MITRE ATT&CK identifier, or None.
Args:
mitre_id (str): MITRE ATT&CK ID (e.g. ``"T1059"`` or ``"T1059.001"``).
Returns:
TechniqueEntity | None: The matching entity, or None if not found.
"""
# ...
...
# -- List access -------------------------------------------------------
def list_all(
self,
*,
# Entry: tactic
tactic: str | None = None,
# Entry: status
status: TechniqueStatus | None = None,
# Entry: review_required
review_required: bool | None = None,
) -> list[TechniqueEntity]: ...
) -> list[TechniqueEntity]:
"""Return all techniques, optionally filtered by tactic, status, or review flag.
def list_by_ids(self, ids: list[uuid.UUID]) -> list[TechniqueEntity]: ...
Args:
tactic (str | None): When provided, restrict results to this tactic category.
status (TechniqueStatus | None): When provided, restrict results to this status.
review_required (bool | None): When provided, restrict results to techniques
whose ``review_required`` flag matches this value.
Returns:
list[TechniqueEntity]: Matching technique entities; may be empty.
"""
# ...
...
# Define function list_by_ids
def list_by_ids(self, ids: list[uuid.UUID]) -> list[TechniqueEntity]:
"""Return all techniques whose primary keys are in *ids*.
Args:
ids (list[uuid.UUID]): List of technique UUIDs to retrieve.
Returns:
list[TechniqueEntity]: Entities found for the supplied IDs; order
is not guaranteed and missing IDs are silently omitted.
"""
# ...
...
# -- Batch queries (scoring/heatmap performance) -----------------------
def count_by_status(self) -> dict[TechniqueStatus, int]: ...
def count_by_status(self) -> dict[TechniqueStatus, int]:
"""Return a count of techniques grouped by their global status.
def find_all_with_test_counts(self) -> list[TechniqueWithCounts]: ...
Returns:
dict[TechniqueStatus, int]: Mapping from each status value to the
number of techniques in that state.
"""
# ...
...
# Define function find_all_with_test_counts
def find_all_with_test_counts(self) -> list[TechniqueWithCounts]:
"""Return all techniques together with pre-aggregated test and rule counts.
Returns:
list[TechniqueWithCounts]: Each element bundles a TechniqueEntity
with its total, validated, and detection-rule counts for use
in heatmap and scoring calculations.
"""
# ...
...
# -- Mutations ---------------------------------------------------------
def save(self, technique: TechniqueEntity) -> TechniqueEntity: ...
def save(self, technique: TechniqueEntity) -> TechniqueEntity:
"""Persist a technique entity and return the saved state.
def exists_by_mitre_id(self, mitre_id: str) -> bool: ...
Args:
technique (TechniqueEntity): The entity to create or update.
Returns:
TechniqueEntity: The persisted entity, potentially with updated
fields (e.g. server-side timestamps).
"""
# ...
...
# Define function exists_by_mitre_id
def exists_by_mitre_id(self, mitre_id: str) -> bool:
"""Return True if a technique with the given MITRE ID exists in the repository.
Args:
mitre_id (str): MITRE ATT&CK ID to check (e.g. ``"T1059"``).
Returns:
bool: True if a matching technique is found, False otherwise.
"""
# ...
...
@@ -3,14 +3,20 @@
This is a domain contract — implementations live in infrastructure/.
"""
# Enable future language features for compatibility
from __future__ import annotations
# Import uuid
import uuid
# Import Protocol from typing
from typing import Protocol
# Import TestState from app.domain.enums
from app.domain.enums import TestState
# Define class TestRepository
class TestRepository(Protocol):
"""Data access contract for tests."""
@@ -22,31 +28,81 @@ class TestRepository(Protocol):
Returns the ORM model directly (not a domain entity) because
the TestEntity is constructed at the service layer via
``TestEntity.from_orm()``.
Args:
test_id (uuid.UUID): Primary key of the test to look up.
Returns:
object | None: The ORM model instance, or None if not found.
"""
# ...
...
# -- List access -------------------------------------------------------
def list_by_technique(self, technique_id: uuid.UUID) -> list[object]: ...
def list_by_technique(self, technique_id: uuid.UUID) -> list[object]:
"""Return all test ORM models associated with the given technique.
def list_by_state(self, state: TestState) -> list[object]: ...
Args:
technique_id (uuid.UUID): Primary key of the technique whose tests to retrieve.
Returns:
list[object]: ORM model instances for all tests linked to this technique.
"""
# ...
...
# Define function list_by_state
def list_by_state(self, state: TestState) -> list[object]:
"""Return all test ORM models in the given state.
Args:
state (TestState): The state to filter tests by.
Returns:
list[object]: ORM model instances for all tests currently in *state*.
"""
# ...
...
# Define function count_by_technique_and_state
def count_by_technique_and_state(
self,
# Entry: technique_id
technique_id: uuid.UUID,
) -> dict[TestState, int]:
"""Return test counts grouped by state for a single technique."""
"""Return test counts grouped by state for a single technique.
Args:
technique_id (uuid.UUID): Primary key of the technique whose test
counts to aggregate.
Returns:
dict[TestState, int]: Mapping from each test state to the number of
tests in that state for the given technique.
"""
# ...
...
# -- Batch queries -----------------------------------------------------
def get_states_and_results_for_technique(
self,
# Entry: technique_id
technique_id: uuid.UUID,
) -> list[tuple[str, str | None]]:
"""Return (state, detection_result) pairs for all tests of a technique.
Used by TechniqueEntity.recalculate_status() without loading full
test models.
Args:
technique_id (uuid.UUID): Primary key of the technique whose test
data to retrieve.
Returns:
list[tuple[str, str | None]]: Each tuple contains the test state
string and the detection result string (or None if not yet set).
"""
# ...
...