diff --git a/backend/app/config.py b/backend/app/config.py index 4fad710..604286f 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -7,9 +7,7 @@ from pydantic_settings import BaseSettings # --------------------------------------------------------------------------- # Detect environment: "production" when AEGIS_ENV or common indicators are set # --------------------------------------------------------------------------- -_is_production = os.environ.get("AEGIS_ENV", "").lower() == "production" or bool( - os.environ.get("SECRET_KEY") # having an explicit SECRET_KEY hints prod -) +_is_production = os.environ.get("AEGIS_ENV", "").lower() == "production" class Settings(BaseSettings): diff --git a/docs/FEATURE_ROADMAP.md b/docs/FEATURE_ROADMAP.md new file mode 100644 index 0000000..b9160d4 --- /dev/null +++ b/docs/FEATURE_ROADMAP.md @@ -0,0 +1,282 @@ +# Aegis — Feature Roadmap + +> **Status:** Phase 0 (Foundations) completed. Platform ready for new feature development. +> **Architecture:** Clean Modular Monolith · 367+ tests · CI/CD · Zero tech debt + +--- + +## Vision + +Aegis evolves in three stages: + +1. **Operational Features (Phases 1-7)** — Integrations, reporting, compliance, intelligence +2. **Detection Assurance Platform (Phases 8-14)** — Every detection has lifecycle, ownership, measurable health, and the system proactively orchestrates revalidation +3. **Enterprise Readiness (Phase 14)** — SSO/SAML and API keys for corporate deployment + +--- + +## Dependency Map + +``` +Phase 0 (DONE) ─────────────────────────────────────────────────── + │ │ + ├──► Phase 1 (Jira + Tempo) │ + │ │ + ├──► Phase 2 (Reporting) │ + │ └──► Phase 6 (Analytics + Webhooks) │ + │ └──► Phase 13 (Intelligent Alerts) │ + │ ▲ │ + ├──► Phase 3 (Compliance) │ + │ └──► Phase 7 (Multi-Channel Notifications) │ + │ │ + ├──► Phase 4 (Intel Auto) │ + │ └──► Phase 12 (Risk Intelligence) │ + │ ▲ │ + ├──► Phase 5 (Advanced Operations) │ + │ │ + ├──► Phase 8 (Detection Lifecycle) ──────────────────────── │ + │ │ │ + │ ├──► Phase 9 (Ownership & Daily Ops) │ + │ │ ├──► Phase 10 (Attack Paths) │ + │ │ └──► Phase 12 (Risk Intelligence) │ + │ │ │ + │ ├──► Phase 11 (Knowledge Management) │ + │ │ │ + │ └──► Phase 13 (Intelligent Alerts) │ + │ │ + └──► Phase 14 (Enterprise SSO + API Keys) │ +``` + +> **Parallelism:** Phases 1-7 and Phase 8 can run in parallel. Phases 9-13 are sequential on Phase 8. Phase 14 is independent. + +--- + +## Phase 1 — Jira + Tempo Integration + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| Jira Link Management | Associate any Aegis entity (test, technique, campaign) with a Jira ticket via bidirectional links | Traceability between security testing and project management | +| Jira Issue Search & Auto-creation | Search Jira from Aegis; auto-create tickets from tests/campaigns with pre-filled data | Eliminates context-switching and manual ticket creation | +| Jira Bidirectional Sync | Hourly background sync pulls Jira status/assignee/priority; push test results as Jira comments | Single pane of glass for both teams | +| Tempo Worklog Integration | Automatically log time to Tempo when tests complete, using Jira link | Accurate time tracking without manual entry | +| Internal Audited Worklogs | Immutable internal time registry with SHA256 integrity hash | Compliance-grade time audit trail | +| Frontend: Jira Panel + Worklog Timeline | React components for linking issues and viewing worklog history in detail views | Self-service Jira integration for all team members | + +--- + +## Phase 2 — Professional Reporting Engine + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| Report Template Engine | Jinja2-based HTML rendering with PDF (WeasyPrint) and DOCX (docxtpl) export | Professional branded reports for stakeholders | +| Purple Team Campaign Report | Executive summary, scope, techniques tested, critical findings, coverage evolution | Deliverable for management after every Purple Team exercise | +| Coverage & Executive Summary Reports | Coverage report, quarterly summary, technique detail — PDF/DOCX/HTML | Board-level reporting without spreadsheets | +| BI-Ready Analytics Endpoints | Flat JSON endpoints for coverage, tests, trends, operator metrics | Direct PowerBI/Tableau integration, zero ETL | +| Advanced Metrics | Coverage by tactic, never-tested techniques, avg validation time, detection trends | Operational KPIs for security leadership | + +--- + +## Phase 3 — Compliance & Security Hardening + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| Enhanced Audit Trail | IP address, user agent, SHA256 integrity hash, session ID on every audit entry | SOC2 / ISO 27001 audit compliance | +| Login Attempt Auditing | Record all login successes/failures with IP; constant-time comparison | Security monitoring and incident response | +| Password & Username Validation | Minimum 10 chars with complexity; reserved username blocking; character whitelist | Credential hardening | +| Extended Rate Limiting | Per-endpoint limits: sync 2/hr, writes 30/min, uploads 10/min, reports 5/min | DDoS and abuse protection | +| Data Classification & Retention | Labels (public/internal/sensitive/restricted) on tests, evidence, campaigns; automated retention | Data governance compliance | + +--- + +## Phase 4 — Automated Intelligence + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| OSINT Enrichment per Technique | Automatic CVE discovery via NVD API linked to ATT&CK techniques; weekly job | Proactive awareness of exploitable techniques | +| Stale Coverage Detection | Flag techniques with last validated test >12 months old; daily job | Prevents false sense of security from outdated validations | + +> Note: Stale detection is superseded by Phase 8's Decay Engine but serves as a functional stepping stone. + +--- + +## Phase 5 — Advanced Operational Management + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| Mature Composite Scoring | Recency decay factor (1.0 recent → 0.2 if >1yr); DB-persisted configurable weights | Scores reflect actual security posture, not just test count | +| Coverage Evolution & History | Enhanced snapshots with tactic breakdown, stale/never-tested counts; temporal comparison | Track security improvement over months/quarters | + +--- + +## Phase 6 — BI Analytics + Webhooks + +**Dependencies:** Phase 2 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Webhook System | Configurable outbound HTTP on events (test validated, campaign completed, MITRE sync) with HMAC signatures | Real-time integration with Slack, Teams, SOAR, SIEM | +| Webhook Management | CRUD for configs; failure tracking; auto-disable on repeated failures | Self-service integrations for ops team | + +--- + +## Phase 7 — Multi-Channel Notifications + +**Dependencies:** Phase 3 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Email Notifications | SMTP-based dispatch for critical events (test validated, campaign completed, new MITRE techniques) | Reach team members not actively in the platform | +| Per-User Notification Preferences | Configurable preferences per user: email on test validated, campaign completed, etc. | Users control their notification volume | + +--- + +## Phase 8 — Detection Lifecycle Management (DLM) + +**Dependencies:** Phase 0 (done). Can run in parallel with Phases 1-7. + +> This is the transformational phase — Aegis evolves from MITRE tracker to Detection Assurance Platform. + +| Feature | Description | User Value | +|---------|-------------|------------| +| Detection Assets | First-class entities for SIEM/EDR/Sigma/YARA/SPL/KQL rules with content hashing, version tracking, log source tracking | Every detection rule is a managed, versioned asset | +| Detection-Technique Mapping | N:M between detection assets and ATT&CK techniques with coverage type and confidence | Know exactly which detections cover which techniques | +| Detection Validations | Immutable records with expiry dates, environment snapshots, integrity hashes | Every detection has a "quality stamp" with an expiration date | +| Decay Engine | Configurable policies per platform/tactic; daily recalculation using recency, coverage, health, diversity factors | Automated detection of degrading security posture | +| Technique Confidence Scores | Composite 0-100 score with 4 factors and risk factor identification | Quantified confidence in detection capability per technique | +| Infrastructure Change Tracking | Log SIEM/EDR updates, parser changes, log source changes; auto-invalidate affected detections | No more silent detection failures after infrastructure changes | +| Configurable Decay Policies | Different decay rates for different platforms, asset types, or tactics | Policy flexibility for different risk appetites | +| DLM Dashboard | Health distribution, confidence distribution, expiring validations, infrastructure changes | Single-view detection health for CISO / SOC Manager | + +--- + +## Phase 9 — Ownership & Daily Operations + +**Dependencies:** Phase 8 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Technique & Detection Ownership | Owner, backup owner, and team on every technique and detection rule | Clear accountability for every detection gap | +| Bulk Ownership Assignment | Assign by tactic, platform, or team; orphan detection report | Quick onboarding of ownership model | +| Revalidation Queue | Auto-generated prioritized queue from expired validations, infra changes, OSINT, MITRE updates | Analysts know exactly what to work on each day | +| Analyst Dashboard | Personalized daily view: pending revalidations, expiring validations, active tests, infra changes | "My workday" in one API call | + +--- + +## Phase 10 — Attack Paths & Advanced Purple Team + +**Dependencies:** Phases 8, 9 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Attack Path Modeling | Chained attack scenarios (Initial Access → Execution → Persistence → Lateral Movement → Exfiltration) as first-class entities | Model realistic adversary behavior, not isolated techniques | +| Step-by-Step Execution | Execute attack paths step-by-step with detection tracking at each stage | Measure where in the kill chain detection fails | +| Collaborative Timeline | Real-time Red/Blue action recording with timestamps for MTTD/MTTR | Precise detection and response time measurement | +| Kill Chain Metrics | Auto-calculated detection rate, MTTD, furthest step reached without detection | Quantified Purple Team exercise results | + +--- + +## Phase 11 — Knowledge Management + +**Dependencies:** Phase 8 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Playbooks per Technique | Attack, detect, investigate, respond, hunt — Markdown, versioned, with tools and prerequisites | Institutional knowledge capture; onboarding accelerator | +| Lessons Learned | Immutable records linked to tests, campaigns, attack paths: what happened, root cause, fix | Continuous improvement loop | + +--- + +## Phase 12 — Risk Intelligence & Recommendations + +**Dependencies:** Phases 4, 8, 9 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Technique Risk Score | Multidimensional: exploitability, threat frequency, detection gap, staleness, tactic severity | Prioritize by actual risk, not just coverage status | +| Automated Recommendations | Prioritized actions: uncovered critical techniques, silent detections, orphan rules, tactic gaps | Intelligent prioritization out of the box | + +--- + +## Phase 13 — Intelligent Operational Alerts + +**Dependencies:** Phases 6, 8, 9, 12 + +| Feature | Description | User Value | +|---------|-------------|------------| +| Rule-Based Operational Alerts | Configurable rules evaluated hourly; multi-channel dispatch (in-app, email, webhook) | Proactive detection of operational issues | +| Pre-configured Alert Rules | Stale critical techniques, EDR update pending revalidation, new uncovered MITRE techniques, coverage regression, validation expiry wave | Operational intelligence out of the box | + +--- + +## Phase 14 — Enterprise Readiness (SSO + API Keys) + +**Dependencies:** Phase 0 (done) + +| Feature | Description | User Value | +|---------|-------------|------------| +| API Key Management | Scoped API keys for BI tools, SOAR, scripts; SHA256-hashed; shown once on creation | Secure automated integrations without sharing user credentials | +| SSO / SAML 2.0 | Single Sign-On via SAML 2.0 with any IdP (Okta, Azure AD, etc.) | Enterprise authentication; eliminates password management | + +--- + +## Phase Summary + +| Phase | Name | Dependencies | +|-------|------|-------------| +| ~~0~~ | ~~Foundations~~ | **DONE** | +| 1 | Jira + Tempo | Phase 0 | +| 2 | Professional Reporting | Phase 0 | +| 3 | Compliance & Security | Phase 0 | +| 4 | Automated Intelligence | Phase 0 | +| 5 | Advanced Operations | Phase 0 | +| 6 | Analytics + Webhooks | Phase 2 | +| 7 | Multi-Channel Notifications | Phase 3 | +| **8** | **Detection Lifecycle (DLM)** | **Phase 0** | +| 9 | Ownership & Daily Ops | Phase 8 | +| 10 | Attack Paths & Purple Team | Phases 8, 9 | +| 11 | Knowledge Management | Phase 8 | +| 12 | Risk Intelligence | Phases 4, 8, 9 | +| 13 | Intelligent Alerts | Phases 6, 8, 9, 12 | +| 14 | Enterprise SSO + API Keys | Phase 0 | + +--- + +## Recommended Additional Features + +| # | Feature | Rationale | Suggested Phase | +|---|---------|-----------|-----------------| +| A1 | Role-customizable dashboard | CISO sees executive metrics, Red Tech sees pending tests | Phase 5 | +| A2 | ATT&CK Navigator layer import/export | Teams already use Navigator externally | Phase 2 | +| A3 | Approval workflow for scoring weight changes | Prevent unsupervised config changes | Phase 3 | +| A4 | Custom tags and fields | Every org has its own taxonomy | Phase 5 | +| A5 | Bulk operations | Validate/reject multiple tests at once, mass campaign assignment | Phases 5, 9 | +| A6 | Markdown in descriptions | Technicians want to format procedures | Phase 11 | +| A7 | Detection Rule Git Sync | Sync rules from corporate Git repo | Phase 8 | +| A8 | Confidence overlay on heatmap | Heatmap shows coverage + confidence as second layer | Phase 8 | +| A9 | Auto Detection Gap → Ticket pipeline | Red breaks something → auto-queue item → assign to Blue | Phase 10 | +| A10 | Navigator export with Confidence | Export layer including confidence level per technique | Phase 8 | +| A11 | Comparative Attack Path Results | Compare same path executed on different dates | Phase 10 | +| A12 | SLA Tracking for Detection Gaps | Measure time from gap to rule implementation | Phase 13 | + +--- + +## New Python Dependencies by Phase + +| Phase | Package | Purpose | +|-------|---------|---------| +| 1 | `atlassian-python-api` | Jira REST API | +| 1 | `tempo-api-python-client` | Tempo worklog API | +| 2 | `weasyprint` | HTML → PDF | +| 2 | `docxtpl` | DOCX template rendering | +| 11 | `markdown`, `Pygments` | Markdown rendering + syntax highlighting | +| 14 | `python3-saml` | SAML 2.0 SSO |