Merge pull request #1 from keboola/feature/v2-fastapi-duckdb-docker-cli

feat: multi-instance deployment (14 must-have items)
This commit is contained in:
ZdenekSrotyr 2026-04-10 18:08:03 +02:00 committed by GitHub
commit dbc57d1de3
27 changed files with 7008 additions and 67 deletions

View file

@ -1,8 +1,9 @@
name: Build & Push
# SUPERSEDED by release.yml — CalVer tagging with stable/dev channels.
# Kept for manual trigger only. Automated builds use release.yml.
name: Build & Push (legacy)
on:
push:
branches: [main]
workflow_dispatch: {}
jobs:
test:
@ -24,27 +25,3 @@ jobs:
run: pytest tests/ -v --tb=short
env:
TESTING: "1"
build-and-push:
needs: test
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
- uses: actions/checkout@v5
- name: Log in to GHCR
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v7
with:
push: true
tags: |
ghcr.io/${{ github.repository }}:latest
ghcr.io/${{ github.repository }}:${{ github.sha }}

148
.github/workflows/release.yml vendored Normal file
View file

@ -0,0 +1,148 @@
name: Release
on:
push:
branches: [main, "feature/**"]
permissions:
contents: write
packages: write
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies
run: uv pip install --system ".[dev]"
- name: Run tests
run: pytest tests/ -v --tb=short
env:
TESTING: "1"
build-and-push:
needs: test
runs-on: ubuntu-latest
outputs:
image_tag: ${{ steps.meta.outputs.versioned_tag }}
version: ${{ steps.meta.outputs.version }}
channel: ${{ steps.meta.outputs.channel }}
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
fetch-tags: true
- name: Claim version tag (with retry to avoid race conditions)
id: meta
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
YEAR_MONTH=$(date +%Y.%m)
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
CHANNEL="stable"
else
CHANNEL="dev"
fi
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
# Claim a unique version by pushing a git tag BEFORE building.
# Retry up to 5 times if another CI run took our N.
TAG_CLAIMED=false
for ATTEMPT in 1 2 3 4 5; do
git fetch --tags --force
# Use max(N) not count — safe even if tags are deleted
MAX_N=$(git tag -l "*-${YEAR_MONTH}.*" | sed 's/.*\.//' | sort -n | tail -1)
N=$(( ${MAX_N:-0} + 1 ))
VERSION="${YEAR_MONTH}.${N}"
TAG="${CHANNEL}-${VERSION}"
git tag -a "$TAG" -m "Release $TAG"
if git push origin "$TAG" 2>/dev/null; then
echo "Claimed tag $TAG (attempt $ATTEMPT)"
TAG_CLAIMED=true
break
else
echo "Tag $TAG already exists, retrying... (attempt $ATTEMPT)"
git tag -d "$TAG"
sleep 2
fi
done
if [ "$TAG_CLAIMED" != "true" ]; then
echo "::error::Failed to claim a unique version tag after 5 attempts"
exit 1
fi
echo "channel=${CHANNEL}" >> "$GITHUB_OUTPUT"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
echo "versioned_tag=${TAG}" >> "$GITHUB_OUTPUT"
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
echo "Channel: ${CHANNEL}"
echo "Version: ${VERSION}"
echo "Versioned tag: ${TAG}"
- name: Log in to GHCR
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v7
with:
push: true
build-args: |
AGNES_VERSION=${{ steps.meta.outputs.version }}
RELEASE_CHANNEL=${{ steps.meta.outputs.channel }}
tags: |
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.channel }}
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.versioned_tag }}
ghcr.io/${{ github.repository }}:sha-${{ steps.meta.outputs.short_sha }}
smoke-test:
needs: build-and-push
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Start Agnes from built image
run: |
# Create empty .env (docker-compose.yml requires env_file: .env, gitignored)
touch .env
# Use prod compose (GHCR images) + CI overlay (test secrets)
export AGNES_TAG="${{ needs.build-and-push.outputs.image_tag }}"
docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml up -d app
# Wait for healthy (max 60s)
timeout 60 bash -c 'until curl -sf http://localhost:8000/api/health | python3 -c "import sys,json; d=json.load(sys.stdin); sys.exit(0 if d[\"status\"]!=\"unhealthy\" else 1)"; do sleep 3; done'
- name: Run smoke tests
run: bash scripts/smoke-test.sh http://localhost:8000
- name: Collect logs on failure
if: failure()
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml logs > smoke-test-logs.txt
- name: Upload logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: smoke-test-logs
path: smoke-test-logs.txt
- name: Teardown
if: always()
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml down -v

33
CHANGELOG.md Normal file
View file

@ -0,0 +1,33 @@
# Changelog
All notable changes to Agnes AI Data Analyst are documented in this file.
Format: [CalVer](https://calver.org/) `YYYY.MM.N` with channels `stable` and `dev`.
---
## stable-2026.04.1 (unreleased)
Multi-instance deployment and self-service setup.
### Added
- CalVer versioning with `stable` and `dev` release channels
- `/api/health` now returns `version`, `channel`, and `schema_version`
- Auto-generated JWT and session secrets with file persistence (`/data/state/.jwt_secret`)
- Pre-migration snapshot of `system.duckdb` before schema upgrades
- `POST /api/admin/configure` for headless data source configuration
- `POST /api/admin/discover-and-register` combined table discovery and registration
- `/setup` web wizard for first-time instance setup
- `scripts/smoke-test.sh` for post-deploy verification
- Smoke test job in CI (Docker-in-CI after every release)
- OpenAPI snapshot test for breaking change detection
- Custom connector mount support (`connectors/custom/`)
- Startup banner logging version, channel, and schema version
- Schema migration safety tests (idempotency, data preservation, snapshot)
- `CHANGELOG.md` and release notes template
### Breaking Changes
None.
### Migration Guide
No action required. Existing instances upgrade seamlessly.

View file

@ -154,7 +154,7 @@ Auth providers in `app/auth/` (FastAPI-based):
## Key Implementation Details
### DuckDB Schema (src/db.py)
- Schema v2 with auto-migration from v1
- Schema v3 with auto-migration from v1→v2→v3
- `table_registry`: id, name, source_type, bucket, source_table, query_mode, sync_schedule, etc.
- `sync_state`, `sync_history`: track extraction progress
- `users`, `dataset_permissions`, `audit_log`: auth + RBAC

View file

@ -6,6 +6,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf
# Install uv for fast dependency management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
ARG AGNES_VERSION=dev
ARG RELEASE_CHANNEL=dev
ENV AGNES_VERSION=${AGNES_VERSION}
ENV RELEASE_CHANNEL=${RELEASE_CHANNEL}
WORKDIR /app
# Copy application code

View file

@ -1,6 +1,6 @@
# Agnes AI Data Analyst — Development Makefile
.PHONY: help test lint dev docker
.PHONY: help test lint dev docker update-openapi-snapshot
help:
@echo "Available targets:"
@ -20,3 +20,7 @@ docker:
lint:
@ruff check . 2>/dev/null || echo "ruff not installed: pip install ruff"
update-openapi-snapshot:
TESTING=1 python scripts/generate_openapi.py > tests/snapshots/openapi.json
@echo "Snapshot updated. Review diff and commit."

View file

@ -1,7 +1,9 @@
"""Admin endpoints — table discovery, registry management."""
"""Admin endpoints — table discovery, registry management, instance configuration."""
import logging
import os
import uuid
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
@ -42,6 +44,16 @@ class UpdateTableRequest(BaseModel):
profile_after_sync: Optional[bool] = None
class ConfigureRequest(BaseModel):
data_source: str # "keboola" | "bigquery" | "local"
keboola_token: Optional[str] = None
keboola_url: Optional[str] = None
bigquery_project: Optional[str] = None
bigquery_location: Optional[str] = None
instance_name: Optional[str] = None
allowed_domain: Optional[str] = None
@router.get("/discover-tables")
async def discover_tables(
user: dict = Depends(require_role(Role.ADMIN)),
@ -53,10 +65,12 @@ async def discover_tables(
if source_type == "keboola":
from connectors.keboola.client import KeboolaClient
import os
from app.instance_config import get_value
url = get_value("keboola", "url", default="")
token = os.environ.get(get_value("keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN"), "")
url = get_value("data_source", "keboola", "stack_url", default="")
token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
token = os.environ.get(token_env, "") if token_env else ""
if not token:
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
client = KeboolaClient(token=token, url=url)
tables = client.discover_all_tables()
return {"tables": tables, "count": len(tables), "source": "keboola"}
@ -144,3 +158,208 @@ async def unregister_table(
if not repo.get(table_id):
raise HTTPException(status_code=404, detail="Table not found")
repo.unregister(table_id)
@router.post("/configure")
async def configure_instance(
request: ConfigureRequest,
user: dict = Depends(require_role(Role.ADMIN)),
):
"""Configure data source and instance settings via API.
Writes config to instance.yaml and persists secrets to .env_overlay.
AI agents and the /setup wizard use this instead of manual file editing.
"""
import yaml
if request.data_source not in ("keboola", "bigquery", "local"):
raise HTTPException(status_code=400, detail="data_source must be 'keboola', 'bigquery', or 'local'")
# Validate credentials if provided
if request.data_source == "keboola":
if not request.keboola_token or not request.keboola_url:
raise HTTPException(status_code=400, detail="keboola_token and keboola_url are required for Keboola data source")
try:
from connectors.keboola.client import KeboolaClient
client = KeboolaClient(token=request.keboola_token, url=request.keboola_url)
client.test_connection()
except Exception as e:
logger.error("Keboola connection validation failed: %s", e)
raise HTTPException(status_code=400, detail="Keboola connection failed. Check your token and URL.")
elif request.data_source == "bigquery":
if not request.bigquery_project:
raise HTTPException(status_code=400, detail="bigquery_project is required for BigQuery data source")
# Write instance.yaml to DATA_DIR/state/ (writable Docker volume),
# NOT to CONFIG_DIR which is mounted read-only in Docker.
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
config_path = data_dir / "state" / "instance.yaml"
# Load existing API-generated config, or fall back to read-only CONFIG_DIR config
existing = {}
if config_path.exists():
try:
existing = yaml.safe_load(config_path.read_text()) or {}
except Exception:
existing = {}
else:
# Try loading from read-only config as base
ro_path = Path(os.environ.get("CONFIG_DIR", "./config")) / "instance.yaml"
if ro_path.exists():
try:
existing = yaml.safe_load(ro_path.read_text()) or {}
except Exception:
existing = {}
# Merge instance settings
if request.instance_name:
existing.setdefault("instance", {})["name"] = request.instance_name
if request.allowed_domain:
existing.setdefault("auth", {})["allowed_domain"] = request.allowed_domain
# Merge data source config (secrets as env var references)
existing["data_source"] = {"type": request.data_source}
if request.data_source == "keboola":
existing["data_source"]["keboola"] = {
"stack_url": request.keboola_url,
"token_env": "KEBOOLA_STORAGE_TOKEN",
}
elif request.data_source == "bigquery":
existing["data_source"]["bigquery"] = {
"project": request.bigquery_project,
"location": request.bigquery_location or "us",
}
# Write to writable data volume
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(yaml.dump(existing, default_flow_style=False, sort_keys=False))
logger.info("Wrote instance config to %s", config_path)
# Persist secrets to .env_overlay (in data volume, never in git)
secrets_to_persist = {}
if request.keboola_token:
secrets_to_persist["KEBOOLA_STORAGE_TOKEN"] = request.keboola_token
if request.keboola_url:
secrets_to_persist["KEBOOLA_STACK_URL"] = request.keboola_url
if secrets_to_persist:
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
overlay_path = data_dir / "state" / ".env_overlay"
overlay_path.parent.mkdir(parents=True, exist_ok=True)
# Merge with existing overlay
existing_overlay = {}
if overlay_path.exists():
for line in overlay_path.read_text().splitlines():
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
existing_overlay[k.strip()] = v.strip()
existing_overlay.update(secrets_to_persist)
overlay_path.write_text(
"\n".join(f"{k}={v}" for k, v in existing_overlay.items()) + "\n"
)
try:
overlay_path.chmod(0o600)
except OSError:
pass
logger.info("Persisted %d secrets to .env_overlay", len(secrets_to_persist))
# Inject into current process environment
for k, v in secrets_to_persist.items():
os.environ[k] = v
# Invalidate cached instance config so next read picks up changes
import app.instance_config as ic
ic._instance_config = None
return {
"status": "ok",
"data_source": request.data_source,
"connection": "verified" if request.data_source != "local" else "local",
}
def _discover_and_register_tables(conn: duckdb.DuckDBPyConnection, user_email: str) -> dict:
"""Discover tables from configured source and register them. Shared logic for API and sync."""
from app.instance_config import get_data_source_type, get_value
source_type = get_data_source_type()
if source_type != "keboola":
return {"registered": 0, "skipped": 0, "errors": 0, "tables": [], "source": source_type}
from connectors.keboola.client import KeboolaClient
# Read from data_source.keboola (matches what /api/admin/configure writes)
url = get_value("data_source", "keboola", "stack_url", default="")
token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
token = os.environ.get(token_env, "") if token_env else ""
if not token:
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
client = KeboolaClient(token=token, url=url)
discovered = client.discover_all_tables()
repo = TableRegistryRepository(conn)
registered = 0
skipped = 0
errors = 0
table_names = []
for table in discovered:
table_id = table.get("id", "").strip().lower().replace(".", "_").replace(" ", "_")
if not table_id:
errors += 1
continue
if repo.get(table_id):
skipped += 1
continue
try:
# Parse bucket from table ID (format: in.c-bucket.table_name)
parts = table.get("id", "").split(".")
bucket = parts[1] if len(parts) > 1 else ""
source_table = parts[2] if len(parts) > 2 else table.get("name", "")
repo.register(
id=table_id,
name=table.get("name", table_id),
source_type="keboola",
bucket=bucket,
source_table=source_table,
query_mode="local",
registered_by=user_email,
description=f"Auto-discovered from Keboola: {table.get('id', '')}",
)
registered += 1
table_names.append(table_id)
except Exception as e:
logger.warning("Failed to register %s: %s", table_id, e)
errors += 1
return {
"registered": registered,
"skipped": skipped,
"errors": errors,
"tables": table_names,
"source": "keboola",
}
@router.post("/discover-and-register")
async def discover_and_register(
user: dict = Depends(require_role(Role.ADMIN)),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Discover tables from configured source and auto-register them.
Combines discover-tables + register-table into one call.
Skips already-registered tables. Used by /setup wizard and AI agents.
"""
try:
result = _discover_and_register_tables(conn, user.get("email", "admin"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=f"Discovery and registration failed: {e}")

View file

@ -1,11 +1,13 @@
"""Health check endpoint — structured diagnostics for AI agents."""
import os
from datetime import datetime, timezone
from fastapi import APIRouter, Depends
import duckdb
from app.auth.dependencies import _get_db
from src.db import SCHEMA_VERSION
from src.repositories.sync_state import SyncStateRepository
router = APIRouter(tags=["health"])
@ -69,6 +71,9 @@ async def health_check(conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
return {
"status": overall,
"version": os.environ.get("AGNES_VERSION", "dev"),
"channel": os.environ.get("RELEASE_CHANNEL", "dev"),
"schema_version": SCHEMA_VERSION,
"timestamp": datetime.now(timezone.utc).isoformat(),
"services": checks,
}

View file

@ -64,8 +64,29 @@ def _run_sync(tables: Optional[List[str]] = None):
sys_conn.close()
if not table_configs:
logger.warning("No tables to sync for source_type=%s", source_type)
return
# Auto-discover tables on first sync when registry is empty
if source_type == "keboola" and os.environ.get("KEBOOLA_STORAGE_TOKEN"):
logger.info("No tables registered — running auto-discovery from Keboola")
try:
from app.api.admin import _discover_and_register_tables
auto_conn = get_system_db()
try:
result = _discover_and_register_tables(auto_conn, "auto-discovery")
logger.info("Auto-discovered %d tables, skipped %d", result["registered"], result["skipped"])
finally:
auto_conn.close()
# Re-read table configs after auto-registration
sys_conn2 = get_system_db()
try:
table_configs = TableRegistryRepository(sys_conn2).list_local(source_type)
finally:
sys_conn2.close()
except Exception as e:
logger.warning("Auto-discovery failed: %s", e)
if not table_configs:
logger.warning("No tables to sync for source_type=%s", source_type)
return
# Serialize configs — strip non-serializable fields
serializable = []
@ -113,6 +134,29 @@ print(json.dumps(result))
else:
print(f"[SYNC] Extractor OK", file=_sys.stderr, flush=True)
# Run custom connectors (Tier A: local mount)
connectors_dir = Path(os.environ.get("CONNECTORS_DIR", str(Path(__file__).parent.parent.parent / "connectors" / "custom")))
if connectors_dir.exists():
for connector_dir in sorted(connectors_dir.iterdir()):
if not connector_dir.is_dir():
continue
extractor = connector_dir / "extractor.py"
if not extractor.exists():
continue
logger.info("Running custom connector: %s", connector_dir.name)
try:
custom_result = subprocess.run(
[sys.executable, str(extractor)],
env=env, capture_output=True, text=True, timeout=600,
cwd=str(Path(__file__).parent.parent.parent),
)
if custom_result.returncode != 0:
logger.error("Custom connector %s failed: %s", connector_dir.name, custom_result.stderr[-500:])
else:
logger.info("Custom connector %s completed", connector_dir.name)
except subprocess.TimeoutExpired:
logger.error("Custom connector %s timed out", connector_dir.name)
# Rebuild master views (reads extract.duckdb files, no write conflict)
from src.orchestrator import SyncOrchestrator
orch = SyncOrchestrator()

View file

@ -7,22 +7,22 @@ from typing import Optional
import jwt
SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "")
if not SECRET_KEY:
def _get_secret_key() -> str:
"""Load JWT secret - from env, file, or auto-generated."""
if os.environ.get("TESTING", "").lower() in ("1", "true"):
SECRET_KEY = "test-jwt-secret-key-minimum-32-chars!!"
else:
raise RuntimeError(
"JWT_SECRET_KEY environment variable is required. "
"Generate one: python -c \"import secrets; print(secrets.token_hex(32))\""
return os.environ.get("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!")
from app.secrets import get_jwt_secret
key = get_jwt_secret()
if len(key) < 32:
import warnings as _warnings
_warnings.warn(
f"JWT_SECRET_KEY is {len(key)} chars — minimum 32 recommended",
UserWarning, stacklevel=2,
)
elif len(SECRET_KEY) < 32 and os.environ.get("TESTING", "").lower() not in ("1", "true"):
import warnings as _warnings
_warnings.warn(
f"JWT_SECRET_KEY is {len(SECRET_KEY)} chars — minimum 32 recommended",
UserWarning, stacklevel=2,
)
return key
SECRET_KEY = _get_secret_key()
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_HOURS = 24 # 24 hours

View file

@ -11,15 +11,34 @@ _instance_config: Optional[dict] = None
def load_instance_config() -> dict:
"""Load instance.yaml using the existing config loader."""
"""Load instance.yaml — checks API-generated config first, then static config.
Search order:
1. DATA_DIR/state/instance.yaml (written by /api/admin/configure, writable)
2. CONFIG_DIR/instance.yaml (static, read-only in Docker)
3. Empty dict with defaults (if neither exists)
"""
global _instance_config
if _instance_config is not None:
return _instance_config
# First, try API-generated config in writable data volume
import yaml
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
api_config_path = data_dir / "state" / "instance.yaml"
if api_config_path.exists():
try:
_instance_config = yaml.safe_load(api_config_path.read_text()) or {}
logger.info("Loaded instance.yaml from %s", api_config_path)
return _instance_config
except Exception as e:
logger.warning(f"Could not load API-generated instance.yaml: {e}")
# Fall back to static config (may have strict validation)
try:
from config.loader import load_instance_config as _load, get_instance_value
from config.loader import load_instance_config as _load
_instance_config = _load()
logger.info("Loaded instance.yaml")
logger.info("Loaded instance.yaml from config/")
except Exception as e:
logger.warning(f"Could not load instance.yaml: {e}. Using defaults.")
_instance_config = {}

View file

@ -48,8 +48,8 @@ def create_app() -> FastAPI:
)
# Session middleware (required for OAuth state)
import secrets as _secrets
session_secret = os.environ.get("SESSION_SECRET", os.environ.get("JWT_SECRET_KEY", _secrets.token_hex(32)))
from app.secrets import get_session_secret
session_secret = get_session_secret()
app.add_middleware(SessionMiddleware, secret_key=session_secret)
# CORS for CLI and external clients
@ -62,6 +62,14 @@ def create_app() -> FastAPI:
allow_headers=["*"],
)
# Load .env_overlay (persisted by /api/admin/configure)
_overlay = Path(os.environ.get("DATA_DIR", "./data")) / "state" / ".env_overlay"
if _overlay.exists():
for line in _overlay.read_text().splitlines():
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip())
# Load instance config on startup
try:
from app.instance_config import load_instance_config
@ -70,6 +78,15 @@ def create_app() -> FastAPI:
except Exception as e:
logger.warning(f"Could not load instance config: {e}")
# Startup banner
from src.db import SCHEMA_VERSION
logger.info(
"Agnes %s | channel: %s | schema v%s",
os.environ.get("AGNES_VERSION", "dev"),
os.environ.get("RELEASE_CHANNEL", "dev"),
SCHEMA_VERSION,
)
# Seed admin user for testing/CI (when SEED_ADMIN_EMAIL is set)
seed_email = os.environ.get("SEED_ADMIN_EMAIL")
if seed_email:

43
app/secrets.py Normal file
View file

@ -0,0 +1,43 @@
"""Auto-generate and persist secrets that survive container restarts."""
import logging
import os
import secrets
from pathlib import Path
logger = logging.getLogger(__name__)
def _load_or_generate(env_var: str, file_name: str) -> str:
"""Load secret from env var, or from file, or generate and persist."""
val = os.environ.get(env_var, "")
if val:
return val
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
secret_path = data_dir / "state" / file_name
if secret_path.exists():
val = secret_path.read_text().strip()
if val:
return val
logger.warning("Secret file %s is empty, regenerating", secret_path)
secret_path.parent.mkdir(parents=True, exist_ok=True)
val = secrets.token_hex(32)
secret_path.write_text(val)
try:
secret_path.chmod(0o600)
except OSError:
pass # chmod not supported on all platforms (e.g., Windows)
logger.info(
"Auto-generated %s -> %s (set %s in .env to use a fixed value)",
file_name, secret_path, env_var,
)
return val
def get_jwt_secret() -> str:
"""Get JWT secret key from env, file, or auto-generate."""
return _load_or_generate("JWT_SECRET_KEY", ".jwt_secret")
def get_session_secret() -> str:
"""Get session secret from env, file, or auto-generate."""
return _load_or_generate("SESSION_SECRET", ".session_secret")

View file

@ -120,6 +120,7 @@ _URL_MAP = {
"email_auth.login_email_form": "/login/email",
"email_auth.send_magic_link": "/auth/email/send-link",
"register": "/auth/password/setup",
"setup": "/setup",
}
@ -177,6 +178,18 @@ async def index(request: Request, user: Optional[dict] = Depends(get_optional_us
return RedirectResponse(url="/login", status_code=302)
@router.get("/setup", response_class=HTMLResponse)
async def setup_wizard(request: Request, conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
"""First-time setup wizard. Redirects to dashboard if users already exist."""
try:
user_count = conn.execute("SELECT COUNT(*) FROM users").fetchone()[0]
if user_count > 0:
return RedirectResponse(url="/login", status_code=302)
except Exception:
pass # No users table yet — show setup
return templates.TemplateResponse(request, "setup.html", _build_context(request))
@router.get("/login", response_class=HTMLResponse)
async def login_page(request: Request):
providers = []

View file

@ -0,0 +1,267 @@
{% extends "base_login.html" %}
{% block title %}Setup - Agnes AI Data Analyst{% endblock %}
{% block content %}
<div class="login-page">
<div class="login-card-wrapper" style="max-width: 520px; margin: 40px auto; padding: 0 20px;">
<div class="login-card" style="max-width: 520px;">
<h2 id="wizard-title">Setup Agnes</h2>
<p class="login-description" id="wizard-description">
Create your admin account to get started.
</p>
<!-- Progress -->
<div style="display: flex; gap: 8px; margin-bottom: 24px;">
<div id="step-dot-1" style="flex: 1; height: 4px; border-radius: 2px; background: var(--primary, #2563eb);"></div>
<div id="step-dot-2" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
<div id="step-dot-3" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
<div id="step-dot-4" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
</div>
<!-- Status message -->
<div id="status-msg" style="display: none; padding: 10px 14px; border-radius: 6px; margin-bottom: 16px; font-size: 14px;"></div>
<!-- Step 1: Create Admin -->
<div id="step-1">
<form id="admin-form" onsubmit="return createAdmin(event)">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Email</label>
<input type="email" id="admin-email" required placeholder="admin@company.com"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Password</label>
<input type="password" id="admin-password" required minlength="8" placeholder="Min. 8 characters"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-admin">
Create Admin Account
</button>
</form>
</div>
<!-- Step 2: Data Source -->
<div id="step-2" style="display: none;">
<form id="source-form" onsubmit="return configureSource(event)">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Data Source</label>
<select id="data-source" onchange="toggleSourceFields()"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<option value="keboola">Keboola</option>
<option value="bigquery">BigQuery</option>
<option value="local">Local / CSV</option>
</select>
<div id="keboola-fields">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Keboola URL</label>
<input type="url" id="keboola-url" placeholder="https://connection.keboola.com"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Storage API Token</label>
<input type="password" id="keboola-token" placeholder="Your Keboola storage token"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
</div>
<div id="bigquery-fields" style="display: none;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">GCP Project</label>
<input type="text" id="bq-project" placeholder="my-gcp-project"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Location</label>
<input type="text" id="bq-location" value="us" placeholder="us"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
</div>
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-source">
Configure Data Source
</button>
<button type="button" onclick="skipToStep(4)" class="btn btn-secondary" style="width: 100%; margin-top: 8px;" id="btn-skip-source">
Skip (configure later)
</button>
</form>
</div>
<!-- Step 3: Discover Tables -->
<div id="step-3" style="display: none;">
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
Discover and register tables from your data source.
</p>
<button onclick="discoverTables()" class="btn btn-primary" style="width: 100%;" id="btn-discover">
Discover Tables
</button>
<div id="discover-result" style="display: none; margin-top: 12px; padding: 12px; background: #f0fdf4; border-radius: 6px; font-size: 14px;"></div>
<button onclick="goToStep(4)" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none;" id="btn-next-sync">
Continue
</button>
</div>
<!-- Step 4: First Sync & Done -->
<div id="step-4" style="display: none;">
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
Start the first data sync and go to your dashboard.
</p>
<button onclick="triggerSync()" class="btn btn-primary" style="width: 100%;" id="btn-sync">
Start First Sync
</button>
<a href="/dashboard" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none; text-align: center; text-decoration: none;" id="btn-dashboard">
Go to Dashboard
</a>
</div>
</div>
</div>
</div>
<script>
let token = '';
const steps = {
1: { title: 'Setup Agnes', desc: 'Create your admin account to get started.' },
2: { title: 'Data Source', desc: 'Connect to your data source.' },
3: { title: 'Discover Tables', desc: 'Find and register tables from your data source.' },
4: { title: 'Almost Done', desc: 'Start syncing data and open your dashboard.' },
};
function showStatus(msg, type) {
const el = document.getElementById('status-msg');
el.textContent = msg;
el.style.display = 'block';
el.style.background = type === 'error' ? '#fef2f2' : '#f0fdf4';
el.style.color = type === 'error' ? '#dc2626' : '#16a34a';
}
function hideStatus() {
document.getElementById('status-msg').style.display = 'none';
}
function goToStep(n) {
hideStatus();
for (let i = 1; i <= 4; i++) {
document.getElementById('step-' + i).style.display = i === n ? 'block' : 'none';
document.getElementById('step-dot-' + i).style.background = i <= n ? 'var(--primary, #2563eb)' : '#e5e7eb';
}
document.getElementById('wizard-title').textContent = steps[n].title;
document.getElementById('wizard-description').textContent = steps[n].desc;
}
function skipToStep(n) {
goToStep(n);
}
function toggleSourceFields() {
const src = document.getElementById('data-source').value;
document.getElementById('keboola-fields').style.display = src === 'keboola' ? 'block' : 'none';
document.getElementById('bigquery-fields').style.display = src === 'bigquery' ? 'block' : 'none';
}
async function apiCall(url, body) {
const headers = { 'Content-Type': 'application/json' };
if (token) headers['Authorization'] = 'Bearer ' + token;
const resp = await fetch(url, { method: 'POST', headers, body: JSON.stringify(body) });
if (resp.status === 401) {
token = '';
sessionStorage.removeItem('setup_token');
showStatus('Session expired. Please refresh the page and start over.', 'error');
throw new Error('Session expired');
}
const data = await resp.json();
if (!resp.ok) throw new Error(data.detail || 'Request failed');
return data;
}
async function createAdmin(e) {
e.preventDefault();
const btn = document.getElementById('btn-admin');
btn.disabled = true;
btn.textContent = 'Creating...';
try {
const data = await apiCall('/auth/bootstrap', {
email: document.getElementById('admin-email').value,
password: document.getElementById('admin-password').value,
});
token = data.access_token;
sessionStorage.setItem('setup_token', token);
goToStep(2);
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Create Admin Account';
}
return false;
}
async function configureSource(e) {
e.preventDefault();
const btn = document.getElementById('btn-source');
btn.disabled = true;
btn.textContent = 'Verifying...';
try {
const src = document.getElementById('data-source').value;
const body = { data_source: src };
if (src === 'keboola') {
body.keboola_url = document.getElementById('keboola-url').value;
body.keboola_token = document.getElementById('keboola-token').value;
} else if (src === 'bigquery') {
body.bigquery_project = document.getElementById('bq-project').value;
body.bigquery_location = document.getElementById('bq-location').value;
}
await apiCall('/api/admin/configure', body);
showStatus('Connection verified!', 'success');
if (src === 'local') {
goToStep(4);
} else {
goToStep(3);
}
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Configure Data Source';
}
return false;
}
async function discoverTables() {
const btn = document.getElementById('btn-discover');
btn.disabled = true;
btn.textContent = 'Discovering...';
try {
const headers = { 'Content-Type': 'application/json' };
if (token) headers['Authorization'] = 'Bearer ' + token;
const resp = await fetch('/api/admin/discover-and-register', { method: 'POST', headers });
const data = await resp.json();
if (!resp.ok) throw new Error(data.detail || 'Discovery failed');
const el = document.getElementById('discover-result');
el.style.display = 'block';
el.textContent = `Registered ${data.registered} tables, skipped ${data.skipped}.`;
document.getElementById('btn-next-sync').style.display = 'block';
btn.style.display = 'none';
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Discover Tables';
}
}
async function triggerSync() {
const btn = document.getElementById('btn-sync');
btn.disabled = true;
btn.textContent = 'Starting sync...';
try {
const headers = {};
if (token) headers['Authorization'] = 'Bearer ' + token;
await fetch('/api/sync/trigger', { method: 'POST', headers });
btn.style.display = 'none';
document.getElementById('btn-dashboard').style.display = 'block';
showStatus('Sync started! You can now go to your dashboard.', 'success');
} catch (err) {
showStatus(err.message, 'error');
btn.disabled = false;
btn.textContent = 'Start First Sync';
}
}
// Restore token from sessionStorage (in case of page reload)
const savedToken = sessionStorage.getItem('setup_token');
if (savedToken) token = savedToken;
</script>
{% endblock %}

11
docker-compose.ci.yml Normal file
View file

@ -0,0 +1,11 @@
# CI smoke test overlay — minimal config for testing in GitHub Actions.
# Usage: docker compose -f docker-compose.yml -f docker-compose.ci.yml up -d
services:
app:
environment:
- JWT_SECRET_KEY=smoke-test-ci-key-minimum-32-chars-xx
- SESSION_SECRET=smoke-test-session-key-32-chars-min-x
- DATA_DIR=/data
- TESTING=0
ports:
- "8000:8000"

View file

@ -1,17 +1,18 @@
# Production override — uses pre-built GHCR image instead of local build.
# Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
# Override tag: AGNES_TAG=stable-2026.04.3 docker compose -f ... up -d
services:
app:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
scheduler:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
extract:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
telegram-bot:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
ws-gateway:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
corporate-memory:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
session-collector:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}

View file

@ -7,6 +7,7 @@ services:
volumes:
- data:/data
- ./config:/app/config:ro
# - ./custom-connectors:/app/connectors/custom:ro # Tier A: AI-generated connectors
env_file: .env
environment:
- DATA_DIR=/data

37
docs/RELEASE_TEMPLATE.md Normal file
View file

@ -0,0 +1,37 @@
# Release Notes Template
Use this template when adding a new entry to `CHANGELOG.md`.
---
## stable-YYYY.MM.N
**Image:** `ghcr.io/keboola/agnes-the-ai-analyst:stable-YYYY.MM.N`
**Digest:** `sha256:...` (from `docker inspect --format='{{index .RepoDigests 0}}'`)
**Date:** YYYY-MM-DD
### Added
- Feature description
### Changed
- Change description
### Fixed
- Bug fix description
### Breaking Changes
- Description of breaking change
- **Migration guide:** Steps to upgrade from previous version
### Deprecated
- Description of deprecated feature (will be removed in YYYY.MM.N)
---
## Guidelines
- Every merge to `main` creates a new `stable-YYYY.MM.N` release
- Include the image digest for verification with `cosign verify`
- Breaking changes require `BREAKING:` prefix in commit message
- Migration guides must include exact commands or config changes
- If a release deprecates the previous stable, note it explicitly

View file

@ -0,0 +1,527 @@
# Multi-Instance Deployment & Versioning — Design Spec
## Goal
Make Agnes deployable to 20+ independent customer instances via self-service, with safe versioning that prevents one customer's PR from breaking another's deployment.
## Context
Agnes is an open-source AI Data Analyst platform. Customers (or their AI agents) deploy it as a Docker image on their own infrastructure. Each instance connects to different data sources (Keboola, BigQuery, Jira, custom).
**Key constraints:**
- Customers range from semi-technical to non-technical, assisted by AI agents
- Cloud-agnostic (GCP, AWS, Azure, on-prem, VPS)
- One repo, one Docker image, many instances
- Community PRs must not break existing customers
- AI agent is the primary "installer" and "developer"
---
## 1. Versioning & Release Channels
### CalVer: `YYYY.MM.N`
Format: year.month.sequential-number. Example: `2026.04.1`, `2026.04.2`, `2026.05.1`.
No manual release decisions. Every merge to main is a release.
### Three channels
| Channel | Floating tag | Versioned tag | Source | Who uses it |
|---------|-------------|---------------|--------|-------------|
| **dev** | `:dev` | `:dev-2026.04.N` | Every CI-passing push on any feature branch | Developers, PR testing |
| **stable** | `:stable` | `:stable-2026.04.N` | Every merge to main + CI pass | All production customers |
| **deprecated** | — | `:deprecated-2026.04.N` | Previous stable after breaking change or failed smoke test | Grace period (30 days) |
Every image also gets a `:sha-abc1234` tag for exact commit traceability.
### Tag lifecycle
```
feature branch push → CI ✅ → :dev + :dev-2026.04.N + :sha-abc1234
❌ → nothing pushed
merge to main → CI ✅ → :stable + :stable-2026.04.N + :sha-abc1234
❌ → merge blocked (CI required)
smoke test on canary VM
✅ → :stable confirmed
❌ → alert, rollback canary to previous :stable
broken build tagged :deprecated-2026.04.N
```
### Version numbering
CalVer `YYYY.MM.N` where N is a global auto-incrementing counter per month across both channels.
Example timeline:
```
Apr 8 feature/foo push → :dev-2026.04.1
Apr 8 feature/bar push → :dev-2026.04.2
Apr 8 merge foo to main → :stable-2026.04.3
Apr 9 feature/baz push → :dev-2026.04.4
Apr 9 merge bar to main → :stable-2026.04.5
```
This avoids confusion — version `2026.04.3` exists only once, in one channel.
### Customer pins version
```yaml
# docker-compose.prod.yml
# Auto-update (recommended): always latest stable
image: ghcr.io/keboola/agnes-the-ai-analyst:stable
# Pinned: specific stable release, manual update
image: ghcr.io/keboola/agnes-the-ai-analyst:stable-2026.04.3
# Testing: latest dev
image: ghcr.io/keboola/agnes-the-ai-analyst:dev
# Testing: specific dev build
image: ghcr.io/keboola/agnes-the-ai-analyst:dev-2026.04.2
```
### Main = stable
- `main` branch is always releasable
- Every merge to main triggers a new stable release
- Feature branches are the dev channel
- No promotion pipeline, no manual approval for releases
- Smoke test is a post-deploy safety net, not a gate
---
## 2. Breaking Change Detection
### What is a breaking change
- `_meta` table schema change (add/remove column)
- `_remote_attach` table schema change
- API endpoint removed or response field removed
- DuckDB system schema migration that drops data
- CLI command removed or argument renamed
- `instance.yaml` required key added
### Automated detection in CI
Every PR runs:
1. **Contract tests**: `_meta` and `_remote_attach` schema validation against frozen spec
2. **OpenAPI diff**: Compare PR's `openapi.json` against main's. Flag removed endpoints/fields.
3. **DuckDB schema diff**: Compare table definitions in system.duckdb
4. **Config diff**: Compare `instance.yaml.example` required keys
5. **Full connector matrix**: ALL connectors tested, not just changed ones
If breaking change detected:
- PR gets `BREAKING` label automatically
- Requires 2 reviewers (elevated review)
- Commit message must have `BREAKING:` prefix
- CHANGELOG.md entry with migration guide required
- On merge: previous stable tagged as `:deprecated-YYYY.MM.N`
### Deprecated channel
When a breaking change merges:
1. Previous stable image retagged to `:deprecated-2026.04.N`
2. New build becomes `:stable` + `:2026.04.(N+1)`
3. Health endpoint on deprecated version shows warning:
```json
{"warnings": ["Running deprecated version 2026.04.3. Update to stable."]}
```
4. Deprecated images removed from GHCR after 30 days
---
## 3. Smoke Test (Post-Deploy Safety Net)
### What it tests
Automated sequence run on canary VM after every `:stable` deploy:
```
1. GET /api/health → status != "unhealthy"
2. POST /auth/token → 200 (valid credentials)
3. GET /api/catalog/tables → count > 0
4. POST /api/query {sql: "SELECT 1"} → 200 + rows
5. POST /api/sync/trigger → 200
6. (wait 30s)
7. GET /api/health → check no new errors
```
### On failure
1. Alert (GitHub issue + optional webhook)
2. Canary VM rolled back to previous stable: `docker compose pull && docker compose up -d` with previous tag
3. Failed build tagged `:deprecated-YYYY.MM.N`
4. `:stable` tag reverted to previous good build
### Implementation
GitHub Actions workflow triggered after the build-and-push workflow completes:
```yaml
smoke-test:
needs: build-and-push
runs-on: ubuntu-latest
steps:
- name: Deploy to canary
run: |
gcloud compute ssh canary-vm --command="
cd /opt/agnes &&
docker compose pull &&
docker compose up -d"
- name: Wait for healthy
run: |
for i in $(seq 1 30); do
STATUS=$(curl -sf canary:8000/api/health | jq -r .status)
[ "$STATUS" != "unhealthy" ] && break
sleep 10
done
- name: Run smoke tests
run: |
# auth, catalog, query, sync checks
./scripts/smoke-test.sh canary:8000
- name: Rollback on failure
if: failure()
run: |
# retag and rollback
```
---
## 4. Self-Service Deployment
### Target experience
Customer (or their AI agent) goes from zero to running instance:
```bash
# 1. Get the code
git clone https://github.com/keboola/agnes-the-ai-analyst.git
cd agnes-the-ai-analyst
# 2. Start it
docker compose up -d
# 3. Open browser or use API
# First visit: /setup wizard (no users exist)
# Or headless: curl -X POST localhost:8000/auth/bootstrap ...
```
### Two setup modes
**A) Interactive (browser):**
- First visit when no users exist → redirected to `/setup`
- Step 1: Create admin account (email + password)
- Step 2: Choose data source (Keboola / BigQuery / CSV / Custom)
- Step 3: Enter credentials (token, URL)
- Step 4: Auto-discover and register tables
- Step 5: Trigger first sync
- Done → redirect to dashboard
**B) Headless (AI agent / CLI):**
```bash
# Bootstrap admin
curl -X POST http://localhost:8000/auth/bootstrap \
-H "Content-Type: application/json" \
-d '{"email":"admin@company.com","password":"SecurePass123!"}'
# Configure data source
curl -X POST http://localhost:8000/api/admin/configure \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"data_source":"keboola","keboola_token":"...","keboola_url":"..."}'
# Discover and register tables
curl -X POST http://localhost:8000/api/admin/discover-and-register \
-H "Authorization: Bearer $TOKEN"
# Trigger first sync
curl -X POST http://localhost:8000/api/sync/trigger \
-H "Authorization: Bearer $TOKEN"
```
Both modes lead to same result. AI agent uses headless.
### Auto-configuration
On first `docker compose up` with no `.env`:
- `JWT_SECRET_KEY` auto-generated and persisted to `/data/state/.jwt_secret`
- `SESSION_SECRET` auto-generated similarly
- App starts in "setup mode" — only `/setup`, `/auth/bootstrap`, and `/api/health` accessible
On first `docker compose up` with `.env` containing `KEBOOLA_STORAGE_TOKEN`:
- Auto-discovers tables from Keboola on first sync
- Skips manual table registration step
### What customer must provide
| Required | Optional |
|----------|----------|
| Server with Docker | Custom domain + TLS |
| Admin email + password | Google OAuth credentials |
| Data source credentials (Keboola token OR BigQuery creds OR CSV files) | Telegram bot token |
| | Jira webhook secret |
### What customer must NOT do
- Edit YAML manually (setup wizard generates `instance.yaml`)
- Generate JWT secret (auto-generated)
- Register tables manually (auto-discovery)
- Understand DuckDB internals
---
## 5. Custom Connectors (Three Tiers)
All tiers produce the same output: `extract.duckdb` with `_meta` table + `data/*.parquet`. Orchestrator treats them identically.
### Tier A: Local mount (fastest, AI-generated)
Customer's AI agent generates a connector. Lives outside Docker image, survives updates.
```
/opt/agnes/
├── docker-compose.yml ← official image
├── docker-compose.override.yml ← customer additions
└── custom-connectors/
└── snowflake/
├── extractor.py
└── requirements.txt
```
```yaml
# docker-compose.override.yml
services:
app:
volumes:
- ./custom-connectors:/app/connectors/custom:ro
```
Orchestrator scans `connectors/custom/*/` in addition to built-in connectors.
**How the AI agent creates one:**
1. Reads CLAUDE.md → understands extract.duckdb contract
2. Reads existing connector as reference (e.g., `connectors/keboola/extractor.py`)
3. Generates `custom-connectors/snowflake/extractor.py`
4. Runs contract test to validate output
5. Done — orchestrator picks it up on next rebuild
**Requirements for this to work:**
- CLAUDE.md must perfectly describe the contract
- Contract test must be runnable standalone
- Existing connectors must be readable as examples
- Clear error messages when contract doesn't match
### Tier B: Standalone container (complex dependencies)
For connectors needing their own runtime (Java, .NET, heavy Python packages).
```yaml
# docker-compose.override.yml
services:
connector-sap:
build: ./custom-connectors/sap
volumes:
- data:/data
environment:
- DATA_DIR=/data
- SAP_HOST=...
profiles:
- extract
```
Connector is its own Docker image. Writes to `/data/extracts/sap/extract.duckdb`. Orchestrator finds it automatically.
### Tier C: Community PR (shared with all)
Connector contributed to main repo via PR. After merge, available in official image for all customers.
```
connectors/
├── keboola/ ← built-in
├── bigquery/ ← built-in
├── jira/ ← built-in
└── snowflake/ ← community contributed
```
**PR requirements:**
- Must pass contract tests
- Must include tests
- Must not modify shared code (orchestrator, API, auth)
- CI runs full connector matrix
---
## 6. CI/CD Pipeline
### On feature branch push
```yaml
ci.yml:
- tests (all 654+)
- contract tests (all connectors)
- docker build
- push :dev + :dev-sha-xxx to GHCR
```
### On merge to main
```yaml
release.yml:
- tests (all)
- contract tests (all connectors)
- breaking change detection (OpenAPI diff, schema diff)
- docker build
- push :stable + :YYYY.MM.N + :sha-xxx to GHCR
- trigger smoke test on canary
smoke-test.yml (triggered):
- deploy to canary VM
- run smoke test sequence
- on failure: rollback canary, tag build as deprecated, create alert
```
### On PR
```yaml
pr-check.yml:
- tests
- contract tests
- breaking change detection
- label PR: "BREAKING" if detected
- require 2 reviewers if breaking
```
---
## 7. Infrastructure (Cloud-Agnostic)
### Primary: Docker Compose
Works everywhere Docker runs. This is the default and only required deployment method.
```bash
git clone https://github.com/keboola/agnes-the-ai-analyst.git
cd agnes-the-ai-analyst
docker compose up -d
```
### Optional: Terraform (GCP)
For automated provisioning. Lives in `infra/` with GCS remote state backend.
```bash
cd infra
terraform workspace new customer-name
terraform apply -var-file=instances/customer-name.tfvars
```
Creates VM, installs Docker, clones repo, generates `.env` and `instance.yaml`, starts Docker Compose.
### Optional: Caddy TLS
Production profile adds Caddy reverse proxy with automatic Let's Encrypt:
```bash
DOMAIN=data.customer.com docker compose --profile production up -d
```
### Directory layout on customer server
```
/opt/agnes/ ← git clone
├── docker-compose.yml ← official
├── docker-compose.prod.yml ← GHCR images
├── docker-compose.override.yml ← customer customizations
├── .env ← secrets (gitignored)
├── config/
│ └── instance.yaml ← generated by setup wizard
├── custom-connectors/ ← Tier A connectors
│ └── snowflake/
└── Caddyfile ← TLS config
/data/ ← Docker volume (persistent)
├── state/system.duckdb ← users, registry, sync state
├── analytics/server.duckdb ← views into extracts
└── extracts/ ← per-source data
├── keboola/extract.duckdb
├── bigquery/extract.duckdb
└── snowflake/extract.duckdb ← from custom connector
```
---
## 8. AI Agent as Primary Installer
CLAUDE.md and documentation must be optimized for AI agent consumption:
### CLAUDE.md requirements
- Complete extract.duckdb contract with exact SQL for `_meta` and `_remote_attach`
- Step-by-step setup instructions with exact curl commands
- Existing connectors as reference for AI-generated new ones
- Clear error messages explaining what went wrong and how to fix
### API requirements
- All setup operations available as API calls (not just UI)
- Self-describing error messages: `"Missing KEBOOLA_STORAGE_TOKEN. Set it in .env or pass via /api/admin/configure"`
- `/api/health` returns structured diagnostics AI agent can parse
- `/api/admin/configure` accepts data source config without file editing
### Documentation requirements
- Machine-readable (no screenshots, no "click here")
- Every manual step has an equivalent API/CLI command
- QUICKSTART.md optimized for copy-paste by AI agent
---
## 9. What Needs to Be Built
### Must have (blocks multi-instance)
| # | What | Effort |
|---|------|--------|
| 1 | CalVer auto-tagging in CI (release.yml) | 1 day |
| 2 | Smoke test script + CI workflow | 1 day |
| 3 | Breaking change detection in CI (OpenAPI diff, contract diff) | 2 days |
| 4 | `/setup` wizard (web) + `/api/admin/configure` (headless) | 3 days |
| 5 | Auto-generate JWT_SECRET_KEY on first start | 0.5 day |
| 6 | Auto-discovery for Keboola tables on first sync | 1 day |
| 7 | Custom connector mount support in orchestrator | 1 day |
| 8 | `CHANGELOG.md` + release notes template | 0.5 day |
| 9 | Health endpoint version + channel info | 0.5 day |
### Should have (improves experience)
| # | What | Effort |
|---|------|--------|
| 10 | Deprecated version warning in health endpoint | 0.5 day |
| 11 | `/api/admin/discover-and-register` auto-discovery endpoint | 1 day |
| 12 | Standalone container connector example (Tier B) | 0.5 day |
| 13 | CLAUDE.md optimization for AI agent setup | 1 day |
| 14 | Terraform module refactor for multi-workspace | 1 day |
### Nice to have (future)
| # | What |
|---|------|
| 15 | Community connector contribution guide |
| 16 | Instance health dashboard (central monitoring) |
| 17 | Automated backup (GCP disk snapshots) |
| 18 | Usage analytics (opt-in telemetry) |
---
## Non-Goals
- Multi-tenancy in single process (each customer = separate instance)
- Kubernetes/Helm (Docker Compose is sufficient for target scale)
- Paid tier / license keys (open-source, monetization TBD)
- GUI for connector development (AI agent + CLAUDE.md is sufficient)

View file

@ -0,0 +1,16 @@
"""Generate OpenAPI snapshot from the current FastAPI app."""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.environ.setdefault("TESTING", "1")
os.environ.setdefault("JWT_SECRET_KEY", "snapshot-generation-key-32-chars-min!!")
from app.main import create_app # noqa: E402
app = create_app()
schema = app.openapi()
json.dump(schema, sys.stdout, indent=2, sort_keys=True)
sys.stdout.write("\n")

97
scripts/smoke-test.sh Executable file
View file

@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Agnes smoke test — verifies a running instance is functional.
# Usage: ./scripts/smoke-test.sh [host:port]
# Default: http://localhost:8000
set -euo pipefail
HOST="${1:-http://localhost:8000}"
PASS=0
FAIL=0
TOKEN=""
check() {
local name="$1" ok="$2"
if [ "$ok" = "true" ]; then
echo " PASS $name"
PASS=$((PASS + 1))
else
echo " FAIL $name"
FAIL=$((FAIL + 1))
fi
}
echo "Smoke test: $HOST"
echo "---"
# 1. Health check
HEALTH=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
if [ "$HEALTH" = "unhealthy" ] || [ "$HEALTH" = "unreachable" ]; then
echo " FATAL: health=$HEALTH"
exit 1
fi
check "health ($HEALTH)" "true"
# 2. Health has version fields
HAS_VERSION=$(curl -sf "$HOST/api/health" | python3 -c "
import sys,json
d=json.load(sys.stdin)
print('true' if 'version' in d and 'channel' in d and 'schema_version' in d else 'false')
" 2>/dev/null || echo "false")
check "health version fields" "$HAS_VERSION"
# 3. Bootstrap (only works on fresh DB; 403 means users exist)
BOOT_HTTP=$(curl -s -o /tmp/smoke_boot.json -w "%{http_code}" -X POST "$HOST/auth/bootstrap" \
-H "Content-Type: application/json" \
-d '{"email":"smoke@test.local","name":"Smoke Test","password":"SmokeTest123!"}' 2>/dev/null || echo "000")
if [ "$BOOT_HTTP" = "200" ]; then
TOKEN=$(python3 -c "import json; print(json.load(open('/tmp/smoke_boot.json'))['access_token'])" 2>/dev/null || echo "")
check "bootstrap (new admin)" "true"
elif [ "$BOOT_HTTP" = "403" ]; then
TOKEN="${SMOKE_TOKEN:-}"
echo " SKIP bootstrap (users exist)"
else
check "bootstrap (HTTP $BOOT_HTTP)" "false"
fi
# 4. Query SELECT 1 (requires auth)
if [ -n "$TOKEN" ]; then
QUERY_OK=$(curl -sf -X POST "$HOST/api/query" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"sql":"SELECT 1 as test"}' | python3 -c "
import sys,json
d=json.load(sys.stdin)
print('true' if len(d.get('rows',[])) > 0 else 'false')
" 2>/dev/null || echo "false")
check "query SELECT 1" "$QUERY_OK"
else
echo " SKIP query (no token)"
fi
# 5. Sync trigger
if [ -n "$TOKEN" ]; then
SYNC_HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$HOST/api/sync/trigger" \
-H "Authorization: Bearer $TOKEN" 2>/dev/null || echo "000")
if [[ "$SYNC_HTTP" =~ ^(200|202)$ ]]; then
check "sync trigger" "true"
else
check "sync trigger (HTTP $SYNC_HTTP)" "false"
fi
else
echo " SKIP sync (no token)"
fi
# 6. Post-sync health (wait briefly)
sleep 5
HEALTH2=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
if [ "$HEALTH2" = "unhealthy" ] || [ "$HEALTH2" = "unreachable" ]; then
check "post-sync health ($HEALTH2)" "false"
else
check "post-sync health ($HEALTH2)" "true"
fi
# Results
echo ""
echo "Results: $PASS passed, $FAIL failed"
[ "$FAIL" -eq 0 ] || exit 1

View file

@ -4,12 +4,16 @@ Provides get_system_db() for the system state database
and get_analytics_db() for the analytics database with parquet views.
"""
import logging
import os
import re
import shutil
from pathlib import Path
import duckdb
logger = logging.getLogger(__name__)
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
SCHEMA_VERSION = 3
@ -260,6 +264,25 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
"""Create tables if they don't exist. Apply migrations if schema version changed."""
current = get_schema_version(conn)
if current < SCHEMA_VERSION:
# Snapshot before migration for rollback support
if current > 0:
try:
db_path = Path(os.environ.get("DATA_DIR", "./data")) / "state" / "system.duckdb"
if db_path.exists():
# Flush WAL to main DB file before copying
try:
conn.execute("CHECKPOINT")
except Exception:
pass # CHECKPOINT may fail on read-only or in-memory DBs
snapshot = db_path.parent / "system.duckdb.pre-migrate"
shutil.copy2(str(db_path), str(snapshot))
# Also copy WAL if it still exists (belt and suspenders)
wal_path = Path(str(db_path) + ".wal")
if wal_path.exists():
shutil.copy2(str(wal_path), str(snapshot) + ".wal")
logger.info("Pre-migration snapshot saved: %s", snapshot)
except Exception as e:
logger.warning("Could not create pre-migration snapshot: %s", e)
conn.execute(_SYSTEM_SCHEMA)
if current == 0:
conn.execute(

5151
tests/snapshots/openapi.json Normal file

File diff suppressed because it is too large Load diff

View file

@ -144,6 +144,205 @@ class TestGetAnalyticsDb:
conn.close()
class TestMigrationSafety:
"""Tests for schema migration correctness, idempotency, and safety snapshots."""
# Minimal v2 table_registry (no is_public column — that comes in v3)
_V2_TABLE_REGISTRY = """
CREATE TABLE table_registry (
id VARCHAR PRIMARY KEY,
name VARCHAR NOT NULL,
source_type VARCHAR,
bucket VARCHAR,
source_table VARCHAR,
sync_strategy VARCHAR DEFAULT 'full_refresh',
query_mode VARCHAR DEFAULT 'local',
sync_schedule VARCHAR,
profile_after_sync BOOLEAN DEFAULT true,
primary_key VARCHAR,
folder VARCHAR,
description TEXT,
registered_by VARCHAR,
registered_at TIMESTAMP DEFAULT current_timestamp
);
"""
def _create_v2_db(self, db_path):
"""Create a minimal v2-schema DuckDB file at db_path."""
import duckdb as _duckdb
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (2);"
)
conn.execute(self._V2_TABLE_REGISTRY)
# Stub out remaining tables so _ensure_schema doesn't fail
for ddl in [
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
]:
conn.execute(ddl)
finally:
conn.close()
def test_v2_to_v3_migration(self, tmp_path, monkeypatch):
"""v2 DB migrated to v3: schema_version=3 and is_public column added."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version
db_path = tmp_path / "state" / "system.duckdb"
self._create_v2_db(db_path)
conn = _duckdb.connect(str(db_path))
try:
_ensure_schema(conn)
assert get_schema_version(conn) == 3
cols = {
r[0]
for r in conn.execute(
"SELECT column_name FROM information_schema.columns WHERE table_name='table_registry'"
).fetchall()
}
assert "is_public" in cols
finally:
conn.close()
def test_migration_idempotency(self, tmp_path, monkeypatch):
"""Calling _ensure_schema twice on a fresh DB raises no error and leaves version at 3."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version, SCHEMA_VERSION
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
_ensure_schema(conn)
_ensure_schema(conn)
assert get_schema_version(conn) == SCHEMA_VERSION
finally:
conn.close()
def test_migration_preserves_data(self, tmp_path, monkeypatch):
"""Data inserted before migration is preserved after migration runs."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version, _SYSTEM_SCHEMA
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
# Build a v1 schema manually
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (1);"
)
conn.execute("""
CREATE TABLE table_registry (
id VARCHAR PRIMARY KEY,
name VARCHAR NOT NULL,
folder VARCHAR,
sync_strategy VARCHAR,
primary_key VARCHAR,
description TEXT,
registered_by VARCHAR,
registered_at TIMESTAMP DEFAULT current_timestamp
);
""")
conn.execute(
"INSERT INTO table_registry (id, name, description) VALUES ('row1', 'MyTable', 'kept')"
)
# Stub remaining tables
for ddl in [
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
]:
conn.execute(ddl)
_ensure_schema(conn)
assert get_schema_version(conn) == 3
row = conn.execute(
"SELECT name, description FROM table_registry WHERE id='row1'"
).fetchone()
assert row is not None, "Pre-migration row was lost"
assert row[0] == "MyTable"
assert row[1] == "kept"
finally:
conn.close()
def test_pre_migration_snapshot_created(self, tmp_path, monkeypatch):
"""A pre-migrate snapshot is written when migrating an existing (non-fresh) DB."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
from src.db import get_system_db
# Create a v2 DB at the expected path before calling get_system_db
db_path = tmp_path / "state" / "system.duckdb"
self._create_v2_db(db_path)
conn = get_system_db()
try:
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
assert snapshot.exists(), "Pre-migration snapshot was not created"
finally:
conn.close()
def test_no_snapshot_on_fresh_db(self, tmp_path, monkeypatch):
"""No pre-migrate snapshot is created when initialising a brand-new DB."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
from src.db import get_system_db
conn = get_system_db()
try:
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
assert not snapshot.exists(), "Snapshot should not exist for a fresh DB"
finally:
conn.close()
def test_future_version_is_noop(self, tmp_path, monkeypatch):
"""_ensure_schema does nothing when schema_version > SCHEMA_VERSION."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (99);"
)
_ensure_schema(conn)
assert get_schema_version(conn) == 99
finally:
conn.close()
class TestGetAnalyticsDbReadonly:
def test_analytics_readonly_rejects_malicious_dir_name(self, tmp_path, monkeypatch):
"""Directories with SQL-injection chars in their name are skipped."""

View file

@ -0,0 +1,73 @@
"""OpenAPI snapshot test — detect breaking API changes.
Compares the current app's OpenAPI schema against a committed snapshot.
Fails if any path or HTTP method has been removed (breaking change).
To update the snapshot after an intentional change:
make update-openapi-snapshot
"""
import json
import os
from pathlib import Path
import pytest
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "openapi.json"
@pytest.fixture(scope="module")
def current_schema():
os.environ.setdefault("TESTING", "1")
from app.main import create_app
app = create_app()
return app.openapi()
def test_snapshot_exists():
"""Committed OpenAPI snapshot must exist."""
assert SNAPSHOT_PATH.exists(), (
"No OpenAPI snapshot found. Generate one with: make update-openapi-snapshot"
)
def test_no_removed_paths(current_schema):
"""No API paths should be removed compared to the snapshot."""
if not SNAPSHOT_PATH.exists():
pytest.skip("No snapshot to compare against")
snapshot = json.loads(SNAPSHOT_PATH.read_text())
current_paths = set(current_schema.get("paths", {}))
snapshot_paths = set(snapshot.get("paths", {}))
removed = snapshot_paths - current_paths
assert not removed, (
f"BREAKING: {len(removed)} API path(s) removed: {sorted(removed)}\n"
"If intentional, run: make update-openapi-snapshot"
)
def test_no_removed_methods(current_schema):
"""No HTTP methods should be removed from existing paths."""
if not SNAPSHOT_PATH.exists():
pytest.skip("No snapshot to compare against")
snapshot = json.loads(SNAPSHOT_PATH.read_text())
current_paths = current_schema.get("paths", {})
snapshot_paths = snapshot.get("paths", {})
breaking = []
for path in set(snapshot_paths) & set(current_paths):
removed_methods = set(snapshot_paths[path]) - set(current_paths[path])
# Ignore non-HTTP keys like 'parameters'
http_methods = {"get", "post", "put", "delete", "patch", "head", "options"}
removed_http = removed_methods & http_methods
if removed_http:
breaking.append(f" {path}: {sorted(removed_http)}")
assert not breaking, (
f"BREAKING: HTTP methods removed from {len(breaking)} path(s):\n"
+ "\n".join(breaking)
+ "\nIf intentional, run: make update-openapi-snapshot"
)

View file

@ -304,26 +304,37 @@ class TestJwtClaims:
# ---- JWT Secret Hardening ----
class TestJwtSecretHardening:
def test_raises_without_jwt_secret_in_non_test_env(self):
"""Module-level code must raise RuntimeError when JWT_SECRET_KEY is absent
and TESTING is not set, preventing accidental production deploys with no secret."""
def test_auto_generates_jwt_secret_when_absent(self, tmp_path):
"""When JWT_SECRET_KEY is absent and TESTING is not set,
the secret is auto-generated and persisted to a file."""
saved_key = os.environ.pop("JWT_SECRET_KEY", None)
saved_testing = os.environ.pop("TESTING", None)
# Eject any cached module so the re-import re-executes module-level code
saved_data_dir = os.environ.get("DATA_DIR")
os.environ["DATA_DIR"] = str(tmp_path)
# Eject cached modules so the re-import re-executes module-level code
sys.modules.pop("app.auth.jwt", None)
sys.modules.pop("app.secrets", None)
try:
with pytest.raises(RuntimeError, match="JWT_SECRET_KEY environment variable is required"):
importlib.import_module("app.auth.jwt")
importlib.import_module("app.auth.jwt")
secret_file = tmp_path / "state" / ".jwt_secret"
assert secret_file.exists(), "JWT secret file should be auto-generated"
secret = secret_file.read_text().strip()
assert len(secret) == 64, "Auto-generated secret should be 64 hex chars (32 bytes)"
finally:
# Restore environment before re-importing so the module loads cleanly
if saved_key is not None:
os.environ["JWT_SECRET_KEY"] = saved_key
if saved_testing is not None:
os.environ["TESTING"] = saved_testing
if saved_data_dir is not None:
os.environ["DATA_DIR"] = saved_data_dir
else:
os.environ.pop("DATA_DIR", None)
# If neither was set (bare test run), use TESTING flag so reload works
if saved_key is None and saved_testing is None:
os.environ["TESTING"] = "1"
sys.modules.pop("app.auth.jwt", None)
sys.modules.pop("app.secrets", None)
importlib.import_module("app.auth.jwt")
# Clean up the temporary TESTING flag if we added it
if saved_key is None and saved_testing is None: