Files
Aegis/backend/app/routers/data_sources.py
Kitos 64d64080e0 fix: resolve 20 security vulnerabilities from comprehensive audit
Critical (1-3):
- Replace hardcoded admin credentials with secure auto-generation (seed.py)
- Enforce SECRET_KEY configuration, fail in production if missing (config.py)
- Add Zip Slip and Zip Bomb protection to all ZIP import services

High/Medium (4-9):
- Add 50MB file size limit and extension whitelist to evidence uploads
- Configure CORS origins via environment variable instead of hardcoded
- Migrate JWT storage from localStorage to HttpOnly cookies (frontend+backend)
- Add rate limiting (5/min) on login endpoint via slowapi
- Replace generic dict payloads with Pydantic schemas (mass assignment)

Medium (10-17):
- Check is_active on login to prevent disabled users from authenticating
- Sanitize exception messages in API responses (system, data_sources)
- Escape LIKE wildcards in all ilike search filters across 8 routers
- Run Docker container as non-root user (appuser)
- Make MINIO_SECURE configurable via environment variable
- Add password complexity policy (12+ chars, upper/lower/digit/special)
- Implement JWT token revocation via in-memory blacklist + reduce TTL to 15min
- Replace xml.etree with defusedxml to prevent Billion Laughs attacks

Low (18-20):
- Add security headers to Nginx (CSP, X-Frame-Options, HSTS-ready, etc.)
- Disable Swagger UI/ReDoc/OpenAPI in production
- Restrict /health endpoint to internal networks via Nginx ACL

Also: rewrite install.sh as interactive wizard for guided deployment,
fix test-from-template validation error (technique_id UUID vs MITRE ID)
2026-02-11 08:56:26 +01:00

304 lines
9.3 KiB
Python

"""Data sources management endpoints (admin only).
Provides a centralized panel for managing all external data sources
(Atomic Red Team, Sigma, LOLBAS, GTFOBins, CALDERA, Elastic, etc.)
including sync triggers, enable/disable toggles, and statistics.
"""
import logging
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies.auth import require_role
from app.models.user import User
from app.models.data_source import DataSource
from app.services.audit_service import log_action
# ---------------------------------------------------------------------------
# Pydantic schemas for request validation
# ---------------------------------------------------------------------------
class DataSourceUpdate(BaseModel):
"""Payload for updating a data source — only allowed fields."""
is_enabled: Optional[bool] = None
sync_frequency: Optional[str] = None
config: Optional[dict] = None
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/data-sources", tags=["data-sources"])
# ---------------------------------------------------------------------------
# Sync dispatcher — maps source name → import function
# ---------------------------------------------------------------------------
def _get_sync_handler(source_name: str):
"""Lazily import and return the sync function for *source_name*.
We import lazily to avoid circular imports and to only load the
modules that are actually needed.
"""
handlers = {
"atomic_red_team": ("app.services.atomic_import_service", "import_atomic_red_team"),
"sigma": ("app.services.sigma_import_service", "sync"),
"lolbas": ("app.services.lolbas_import_service", "sync"),
"gtfobins": ("app.services.lolbas_import_service", "sync_gtfobins"),
"caldera": ("app.services.caldera_import_service", "sync"),
"elastic_rules": ("app.services.elastic_import_service", "sync"),
"mitre_cti": ("app.services.threat_actor_import_service", "sync"),
"d3fend": ("app.services.d3fend_import_service", "sync"),
}
if source_name not in handlers:
return None
module_path, func_name = handlers[source_name]
import importlib
mod = importlib.import_module(module_path)
return getattr(mod, func_name)
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("")
def list_data_sources(
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""List all registered data sources.
**Requires** the ``admin`` role.
"""
sources = db.query(DataSource).order_by(DataSource.name).all()
return [
{
"id": str(s.id),
"name": s.name,
"display_name": s.display_name,
"type": s.type,
"url": s.url,
"description": s.description,
"is_enabled": s.is_enabled,
"last_sync_at": s.last_sync_at.isoformat() if s.last_sync_at else None,
"last_sync_status": s.last_sync_status,
"last_sync_stats": s.last_sync_stats,
"sync_frequency": s.sync_frequency,
"config": s.config,
"created_at": s.created_at.isoformat() if s.created_at else None,
}
for s in sources
]
@router.patch("/{source_id}")
def update_data_source(
source_id: str,
body: DataSourceUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Update a data source (enable/disable, change config).
**Requires** the ``admin`` role.
"""
ds = db.query(DataSource).filter(DataSource.id == source_id).first()
if not ds:
raise HTTPException(status_code=404, detail="Data source not found")
update_data = body.model_dump(exclude_unset=True)
if "is_enabled" in update_data:
ds.is_enabled = update_data["is_enabled"]
if "sync_frequency" in update_data:
ds.sync_frequency = update_data["sync_frequency"]
if "config" in update_data:
ds.config = update_data["config"]
db.commit()
log_action(
db,
user_id=current_user.id,
action="update_data_source",
entity_type="data_source",
entity_id=str(ds.id),
details={"updates": update_data},
)
return {"message": "Data source updated", "id": str(ds.id)}
@router.post("/{source_id}/sync")
def sync_data_source(
source_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Trigger sync/import for a specific data source.
**Requires** the ``admin`` role.
"""
ds = db.query(DataSource).filter(DataSource.id == source_id).first()
if not ds:
raise HTTPException(status_code=404, detail="Data source not found")
handler = _get_sync_handler(ds.name)
if handler is None:
raise HTTPException(
status_code=400,
detail=f"No sync handler available for '{ds.name}'",
)
# Mark as in_progress
ds.last_sync_status = "in_progress"
db.commit()
try:
summary = handler(db)
except Exception as exc:
logger.error("Sync failed for %s: %s", ds.name, exc, exc_info=True)
ds.last_sync_status = "error"
ds.last_sync_at = datetime.utcnow()
ds.last_sync_stats = {"error": str(exc)}
db.commit()
raise HTTPException(
status_code=500,
detail=f"Sync failed for '{ds.display_name}'. Check server logs for details.",
)
# Update DS record (the handler may already have done this,
# but we ensure it here as well)
ds.last_sync_at = datetime.utcnow()
ds.last_sync_status = "success"
ds.last_sync_stats = summary
db.commit()
return {
"message": f"Sync complete for {ds.display_name}",
"source": ds.name,
"stats": summary,
}
@router.post("/sync-all")
def sync_all_data_sources(
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Trigger sync for all enabled data sources (sequentially).
**Requires** the ``admin`` role.
"""
enabled_sources = (
db.query(DataSource)
.filter(DataSource.is_enabled == True)
.order_by(DataSource.name)
.all()
)
results = []
for ds in enabled_sources:
handler = _get_sync_handler(ds.name)
if handler is None:
results.append({
"source": ds.name,
"status": "skipped",
"detail": "No sync handler available",
})
continue
ds.last_sync_status = "in_progress"
db.commit()
try:
summary = handler(db)
ds.last_sync_at = datetime.utcnow()
ds.last_sync_status = "success"
ds.last_sync_stats = summary
db.commit()
results.append({
"source": ds.name,
"status": "success",
"stats": summary,
})
except Exception as exc:
logger.error("Sync failed for %s: %s", ds.name, exc, exc_info=True)
ds.last_sync_status = "error"
ds.last_sync_at = datetime.utcnow()
ds.last_sync_stats = {"error": str(exc)}
db.commit()
results.append({
"source": ds.name,
"status": "error",
"detail": "Sync failed. Check server logs for details.",
})
log_action(
db,
user_id=current_user.id,
action="sync_all_data_sources",
entity_type="data_source",
entity_id=None,
details={"results": results},
)
return {"message": "Sync all complete", "results": results}
@router.get("/{source_id}/stats")
def get_data_source_stats(
source_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(require_role("admin")),
):
"""Get detailed statistics for a specific data source.
**Requires** the ``admin`` role.
"""
ds = db.query(DataSource).filter(DataSource.id == source_id).first()
if not ds:
raise HTTPException(status_code=404, detail="Data source not found")
# Count items from this source
from app.models.test_template import TestTemplate
from app.models.detection_rule import DetectionRule
template_count = 0
rule_count = 0
if ds.type == "attack_procedure":
template_count = (
db.query(TestTemplate)
.filter(TestTemplate.source == ds.name)
.count()
)
elif ds.type == "detection_rule":
rule_count = (
db.query(DetectionRule)
.filter(DetectionRule.source == ds.name)
.count()
)
return {
"id": str(ds.id),
"name": ds.name,
"display_name": ds.display_name,
"type": ds.type,
"is_enabled": ds.is_enabled,
"last_sync_at": ds.last_sync_at.isoformat() if ds.last_sync_at else None,
"last_sync_status": ds.last_sync_status,
"last_sync_stats": ds.last_sync_stats,
"total_templates": template_count,
"total_rules": rule_count,
}