Merge pull request #1 from keboola/feature/v2-fastapi-duckdb-docker-cli

feat: multi-instance deployment (14 must-have items)
This commit is contained in:
ZdenekSrotyr 2026-04-10 18:08:03 +02:00 committed by GitHub
commit dbc57d1de3
27 changed files with 7008 additions and 67 deletions

View file

@ -1,8 +1,9 @@
name: Build & Push # SUPERSEDED by release.yml — CalVer tagging with stable/dev channels.
# Kept for manual trigger only. Automated builds use release.yml.
name: Build & Push (legacy)
on: on:
push: workflow_dispatch: {}
branches: [main]
jobs: jobs:
test: test:
@ -24,27 +25,3 @@ jobs:
run: pytest tests/ -v --tb=short run: pytest tests/ -v --tb=short
env: env:
TESTING: "1" TESTING: "1"
build-and-push:
needs: test
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
- uses: actions/checkout@v5
- name: Log in to GHCR
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v7
with:
push: true
tags: |
ghcr.io/${{ github.repository }}:latest
ghcr.io/${{ github.repository }}:${{ github.sha }}

148
.github/workflows/release.yml vendored Normal file
View file

@ -0,0 +1,148 @@
name: Release
on:
push:
branches: [main, "feature/**"]
permissions:
contents: write
packages: write
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies
run: uv pip install --system ".[dev]"
- name: Run tests
run: pytest tests/ -v --tb=short
env:
TESTING: "1"
build-and-push:
needs: test
runs-on: ubuntu-latest
outputs:
image_tag: ${{ steps.meta.outputs.versioned_tag }}
version: ${{ steps.meta.outputs.version }}
channel: ${{ steps.meta.outputs.channel }}
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
fetch-tags: true
- name: Claim version tag (with retry to avoid race conditions)
id: meta
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
YEAR_MONTH=$(date +%Y.%m)
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
CHANNEL="stable"
else
CHANNEL="dev"
fi
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
# Claim a unique version by pushing a git tag BEFORE building.
# Retry up to 5 times if another CI run took our N.
TAG_CLAIMED=false
for ATTEMPT in 1 2 3 4 5; do
git fetch --tags --force
# Use max(N) not count — safe even if tags are deleted
MAX_N=$(git tag -l "*-${YEAR_MONTH}.*" | sed 's/.*\.//' | sort -n | tail -1)
N=$(( ${MAX_N:-0} + 1 ))
VERSION="${YEAR_MONTH}.${N}"
TAG="${CHANNEL}-${VERSION}"
git tag -a "$TAG" -m "Release $TAG"
if git push origin "$TAG" 2>/dev/null; then
echo "Claimed tag $TAG (attempt $ATTEMPT)"
TAG_CLAIMED=true
break
else
echo "Tag $TAG already exists, retrying... (attempt $ATTEMPT)"
git tag -d "$TAG"
sleep 2
fi
done
if [ "$TAG_CLAIMED" != "true" ]; then
echo "::error::Failed to claim a unique version tag after 5 attempts"
exit 1
fi
echo "channel=${CHANNEL}" >> "$GITHUB_OUTPUT"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
echo "versioned_tag=${TAG}" >> "$GITHUB_OUTPUT"
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
echo "Channel: ${CHANNEL}"
echo "Version: ${VERSION}"
echo "Versioned tag: ${TAG}"
- name: Log in to GHCR
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v7
with:
push: true
build-args: |
AGNES_VERSION=${{ steps.meta.outputs.version }}
RELEASE_CHANNEL=${{ steps.meta.outputs.channel }}
tags: |
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.channel }}
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.versioned_tag }}
ghcr.io/${{ github.repository }}:sha-${{ steps.meta.outputs.short_sha }}
smoke-test:
needs: build-and-push
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Start Agnes from built image
run: |
# Create empty .env (docker-compose.yml requires env_file: .env, gitignored)
touch .env
# Use prod compose (GHCR images) + CI overlay (test secrets)
export AGNES_TAG="${{ needs.build-and-push.outputs.image_tag }}"
docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml up -d app
# Wait for healthy (max 60s)
timeout 60 bash -c 'until curl -sf http://localhost:8000/api/health | python3 -c "import sys,json; d=json.load(sys.stdin); sys.exit(0 if d[\"status\"]!=\"unhealthy\" else 1)"; do sleep 3; done'
- name: Run smoke tests
run: bash scripts/smoke-test.sh http://localhost:8000
- name: Collect logs on failure
if: failure()
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml logs > smoke-test-logs.txt
- name: Upload logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: smoke-test-logs
path: smoke-test-logs.txt
- name: Teardown
if: always()
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml down -v

33
CHANGELOG.md Normal file
View file

@ -0,0 +1,33 @@
# Changelog
All notable changes to Agnes AI Data Analyst are documented in this file.
Format: [CalVer](https://calver.org/) `YYYY.MM.N` with channels `stable` and `dev`.
---
## stable-2026.04.1 (unreleased)
Multi-instance deployment and self-service setup.
### Added
- CalVer versioning with `stable` and `dev` release channels
- `/api/health` now returns `version`, `channel`, and `schema_version`
- Auto-generated JWT and session secrets with file persistence (`/data/state/.jwt_secret`)
- Pre-migration snapshot of `system.duckdb` before schema upgrades
- `POST /api/admin/configure` for headless data source configuration
- `POST /api/admin/discover-and-register` combined table discovery and registration
- `/setup` web wizard for first-time instance setup
- `scripts/smoke-test.sh` for post-deploy verification
- Smoke test job in CI (Docker-in-CI after every release)
- OpenAPI snapshot test for breaking change detection
- Custom connector mount support (`connectors/custom/`)
- Startup banner logging version, channel, and schema version
- Schema migration safety tests (idempotency, data preservation, snapshot)
- `CHANGELOG.md` and release notes template
### Breaking Changes
None.
### Migration Guide
No action required. Existing instances upgrade seamlessly.

View file

@ -154,7 +154,7 @@ Auth providers in `app/auth/` (FastAPI-based):
## Key Implementation Details ## Key Implementation Details
### DuckDB Schema (src/db.py) ### DuckDB Schema (src/db.py)
- Schema v2 with auto-migration from v1 - Schema v3 with auto-migration from v1→v2→v3
- `table_registry`: id, name, source_type, bucket, source_table, query_mode, sync_schedule, etc. - `table_registry`: id, name, source_type, bucket, source_table, query_mode, sync_schedule, etc.
- `sync_state`, `sync_history`: track extraction progress - `sync_state`, `sync_history`: track extraction progress
- `users`, `dataset_permissions`, `audit_log`: auth + RBAC - `users`, `dataset_permissions`, `audit_log`: auth + RBAC

View file

@ -6,6 +6,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf
# Install uv for fast dependency management # Install uv for fast dependency management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
ARG AGNES_VERSION=dev
ARG RELEASE_CHANNEL=dev
ENV AGNES_VERSION=${AGNES_VERSION}
ENV RELEASE_CHANNEL=${RELEASE_CHANNEL}
WORKDIR /app WORKDIR /app
# Copy application code # Copy application code

View file

@ -1,6 +1,6 @@
# Agnes AI Data Analyst — Development Makefile # Agnes AI Data Analyst — Development Makefile
.PHONY: help test lint dev docker .PHONY: help test lint dev docker update-openapi-snapshot
help: help:
@echo "Available targets:" @echo "Available targets:"
@ -20,3 +20,7 @@ docker:
lint: lint:
@ruff check . 2>/dev/null || echo "ruff not installed: pip install ruff" @ruff check . 2>/dev/null || echo "ruff not installed: pip install ruff"
update-openapi-snapshot:
TESTING=1 python scripts/generate_openapi.py > tests/snapshots/openapi.json
@echo "Snapshot updated. Review diff and commit."

View file

@ -1,7 +1,9 @@
"""Admin endpoints — table discovery, registry management.""" """Admin endpoints — table discovery, registry management, instance configuration."""
import logging import logging
import os
import uuid import uuid
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
@ -42,6 +44,16 @@ class UpdateTableRequest(BaseModel):
profile_after_sync: Optional[bool] = None profile_after_sync: Optional[bool] = None
class ConfigureRequest(BaseModel):
data_source: str # "keboola" | "bigquery" | "local"
keboola_token: Optional[str] = None
keboola_url: Optional[str] = None
bigquery_project: Optional[str] = None
bigquery_location: Optional[str] = None
instance_name: Optional[str] = None
allowed_domain: Optional[str] = None
@router.get("/discover-tables") @router.get("/discover-tables")
async def discover_tables( async def discover_tables(
user: dict = Depends(require_role(Role.ADMIN)), user: dict = Depends(require_role(Role.ADMIN)),
@ -53,10 +65,12 @@ async def discover_tables(
if source_type == "keboola": if source_type == "keboola":
from connectors.keboola.client import KeboolaClient from connectors.keboola.client import KeboolaClient
import os
from app.instance_config import get_value from app.instance_config import get_value
url = get_value("keboola", "url", default="") url = get_value("data_source", "keboola", "stack_url", default="")
token = os.environ.get(get_value("keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN"), "") token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
token = os.environ.get(token_env, "") if token_env else ""
if not token:
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
client = KeboolaClient(token=token, url=url) client = KeboolaClient(token=token, url=url)
tables = client.discover_all_tables() tables = client.discover_all_tables()
return {"tables": tables, "count": len(tables), "source": "keboola"} return {"tables": tables, "count": len(tables), "source": "keboola"}
@ -144,3 +158,208 @@ async def unregister_table(
if not repo.get(table_id): if not repo.get(table_id):
raise HTTPException(status_code=404, detail="Table not found") raise HTTPException(status_code=404, detail="Table not found")
repo.unregister(table_id) repo.unregister(table_id)
@router.post("/configure")
async def configure_instance(
request: ConfigureRequest,
user: dict = Depends(require_role(Role.ADMIN)),
):
"""Configure data source and instance settings via API.
Writes config to instance.yaml and persists secrets to .env_overlay.
AI agents and the /setup wizard use this instead of manual file editing.
"""
import yaml
if request.data_source not in ("keboola", "bigquery", "local"):
raise HTTPException(status_code=400, detail="data_source must be 'keboola', 'bigquery', or 'local'")
# Validate credentials if provided
if request.data_source == "keboola":
if not request.keboola_token or not request.keboola_url:
raise HTTPException(status_code=400, detail="keboola_token and keboola_url are required for Keboola data source")
try:
from connectors.keboola.client import KeboolaClient
client = KeboolaClient(token=request.keboola_token, url=request.keboola_url)
client.test_connection()
except Exception as e:
logger.error("Keboola connection validation failed: %s", e)
raise HTTPException(status_code=400, detail="Keboola connection failed. Check your token and URL.")
elif request.data_source == "bigquery":
if not request.bigquery_project:
raise HTTPException(status_code=400, detail="bigquery_project is required for BigQuery data source")
# Write instance.yaml to DATA_DIR/state/ (writable Docker volume),
# NOT to CONFIG_DIR which is mounted read-only in Docker.
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
config_path = data_dir / "state" / "instance.yaml"
# Load existing API-generated config, or fall back to read-only CONFIG_DIR config
existing = {}
if config_path.exists():
try:
existing = yaml.safe_load(config_path.read_text()) or {}
except Exception:
existing = {}
else:
# Try loading from read-only config as base
ro_path = Path(os.environ.get("CONFIG_DIR", "./config")) / "instance.yaml"
if ro_path.exists():
try:
existing = yaml.safe_load(ro_path.read_text()) or {}
except Exception:
existing = {}
# Merge instance settings
if request.instance_name:
existing.setdefault("instance", {})["name"] = request.instance_name
if request.allowed_domain:
existing.setdefault("auth", {})["allowed_domain"] = request.allowed_domain
# Merge data source config (secrets as env var references)
existing["data_source"] = {"type": request.data_source}
if request.data_source == "keboola":
existing["data_source"]["keboola"] = {
"stack_url": request.keboola_url,
"token_env": "KEBOOLA_STORAGE_TOKEN",
}
elif request.data_source == "bigquery":
existing["data_source"]["bigquery"] = {
"project": request.bigquery_project,
"location": request.bigquery_location or "us",
}
# Write to writable data volume
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(yaml.dump(existing, default_flow_style=False, sort_keys=False))
logger.info("Wrote instance config to %s", config_path)
# Persist secrets to .env_overlay (in data volume, never in git)
secrets_to_persist = {}
if request.keboola_token:
secrets_to_persist["KEBOOLA_STORAGE_TOKEN"] = request.keboola_token
if request.keboola_url:
secrets_to_persist["KEBOOLA_STACK_URL"] = request.keboola_url
if secrets_to_persist:
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
overlay_path = data_dir / "state" / ".env_overlay"
overlay_path.parent.mkdir(parents=True, exist_ok=True)
# Merge with existing overlay
existing_overlay = {}
if overlay_path.exists():
for line in overlay_path.read_text().splitlines():
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
existing_overlay[k.strip()] = v.strip()
existing_overlay.update(secrets_to_persist)
overlay_path.write_text(
"\n".join(f"{k}={v}" for k, v in existing_overlay.items()) + "\n"
)
try:
overlay_path.chmod(0o600)
except OSError:
pass
logger.info("Persisted %d secrets to .env_overlay", len(secrets_to_persist))
# Inject into current process environment
for k, v in secrets_to_persist.items():
os.environ[k] = v
# Invalidate cached instance config so next read picks up changes
import app.instance_config as ic
ic._instance_config = None
return {
"status": "ok",
"data_source": request.data_source,
"connection": "verified" if request.data_source != "local" else "local",
}
def _discover_and_register_tables(conn: duckdb.DuckDBPyConnection, user_email: str) -> dict:
"""Discover tables from configured source and register them. Shared logic for API and sync."""
from app.instance_config import get_data_source_type, get_value
source_type = get_data_source_type()
if source_type != "keboola":
return {"registered": 0, "skipped": 0, "errors": 0, "tables": [], "source": source_type}
from connectors.keboola.client import KeboolaClient
# Read from data_source.keboola (matches what /api/admin/configure writes)
url = get_value("data_source", "keboola", "stack_url", default="")
token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
token = os.environ.get(token_env, "") if token_env else ""
if not token:
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
client = KeboolaClient(token=token, url=url)
discovered = client.discover_all_tables()
repo = TableRegistryRepository(conn)
registered = 0
skipped = 0
errors = 0
table_names = []
for table in discovered:
table_id = table.get("id", "").strip().lower().replace(".", "_").replace(" ", "_")
if not table_id:
errors += 1
continue
if repo.get(table_id):
skipped += 1
continue
try:
# Parse bucket from table ID (format: in.c-bucket.table_name)
parts = table.get("id", "").split(".")
bucket = parts[1] if len(parts) > 1 else ""
source_table = parts[2] if len(parts) > 2 else table.get("name", "")
repo.register(
id=table_id,
name=table.get("name", table_id),
source_type="keboola",
bucket=bucket,
source_table=source_table,
query_mode="local",
registered_by=user_email,
description=f"Auto-discovered from Keboola: {table.get('id', '')}",
)
registered += 1
table_names.append(table_id)
except Exception as e:
logger.warning("Failed to register %s: %s", table_id, e)
errors += 1
return {
"registered": registered,
"skipped": skipped,
"errors": errors,
"tables": table_names,
"source": "keboola",
}
@router.post("/discover-and-register")
async def discover_and_register(
user: dict = Depends(require_role(Role.ADMIN)),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Discover tables from configured source and auto-register them.
Combines discover-tables + register-table into one call.
Skips already-registered tables. Used by /setup wizard and AI agents.
"""
try:
result = _discover_and_register_tables(conn, user.get("email", "admin"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=f"Discovery and registration failed: {e}")

View file

@ -1,11 +1,13 @@
"""Health check endpoint — structured diagnostics for AI agents.""" """Health check endpoint — structured diagnostics for AI agents."""
import os
from datetime import datetime, timezone from datetime import datetime, timezone
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
import duckdb import duckdb
from app.auth.dependencies import _get_db from app.auth.dependencies import _get_db
from src.db import SCHEMA_VERSION
from src.repositories.sync_state import SyncStateRepository from src.repositories.sync_state import SyncStateRepository
router = APIRouter(tags=["health"]) router = APIRouter(tags=["health"])
@ -69,6 +71,9 @@ async def health_check(conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
return { return {
"status": overall, "status": overall,
"version": os.environ.get("AGNES_VERSION", "dev"),
"channel": os.environ.get("RELEASE_CHANNEL", "dev"),
"schema_version": SCHEMA_VERSION,
"timestamp": datetime.now(timezone.utc).isoformat(), "timestamp": datetime.now(timezone.utc).isoformat(),
"services": checks, "services": checks,
} }

View file

@ -63,6 +63,27 @@ def _run_sync(tables: Optional[List[str]] = None):
finally: finally:
sys_conn.close() sys_conn.close()
if not table_configs:
# Auto-discover tables on first sync when registry is empty
if source_type == "keboola" and os.environ.get("KEBOOLA_STORAGE_TOKEN"):
logger.info("No tables registered — running auto-discovery from Keboola")
try:
from app.api.admin import _discover_and_register_tables
auto_conn = get_system_db()
try:
result = _discover_and_register_tables(auto_conn, "auto-discovery")
logger.info("Auto-discovered %d tables, skipped %d", result["registered"], result["skipped"])
finally:
auto_conn.close()
# Re-read table configs after auto-registration
sys_conn2 = get_system_db()
try:
table_configs = TableRegistryRepository(sys_conn2).list_local(source_type)
finally:
sys_conn2.close()
except Exception as e:
logger.warning("Auto-discovery failed: %s", e)
if not table_configs: if not table_configs:
logger.warning("No tables to sync for source_type=%s", source_type) logger.warning("No tables to sync for source_type=%s", source_type)
return return
@ -113,6 +134,29 @@ print(json.dumps(result))
else: else:
print(f"[SYNC] Extractor OK", file=_sys.stderr, flush=True) print(f"[SYNC] Extractor OK", file=_sys.stderr, flush=True)
# Run custom connectors (Tier A: local mount)
connectors_dir = Path(os.environ.get("CONNECTORS_DIR", str(Path(__file__).parent.parent.parent / "connectors" / "custom")))
if connectors_dir.exists():
for connector_dir in sorted(connectors_dir.iterdir()):
if not connector_dir.is_dir():
continue
extractor = connector_dir / "extractor.py"
if not extractor.exists():
continue
logger.info("Running custom connector: %s", connector_dir.name)
try:
custom_result = subprocess.run(
[sys.executable, str(extractor)],
env=env, capture_output=True, text=True, timeout=600,
cwd=str(Path(__file__).parent.parent.parent),
)
if custom_result.returncode != 0:
logger.error("Custom connector %s failed: %s", connector_dir.name, custom_result.stderr[-500:])
else:
logger.info("Custom connector %s completed", connector_dir.name)
except subprocess.TimeoutExpired:
logger.error("Custom connector %s timed out", connector_dir.name)
# Rebuild master views (reads extract.duckdb files, no write conflict) # Rebuild master views (reads extract.duckdb files, no write conflict)
from src.orchestrator import SyncOrchestrator from src.orchestrator import SyncOrchestrator
orch = SyncOrchestrator() orch = SyncOrchestrator()

View file

@ -7,22 +7,22 @@ from typing import Optional
import jwt import jwt
SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "") def _get_secret_key() -> str:
"""Load JWT secret - from env, file, or auto-generated."""
if not SECRET_KEY:
if os.environ.get("TESTING", "").lower() in ("1", "true"): if os.environ.get("TESTING", "").lower() in ("1", "true"):
SECRET_KEY = "test-jwt-secret-key-minimum-32-chars!!" return os.environ.get("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!")
else: from app.secrets import get_jwt_secret
raise RuntimeError( key = get_jwt_secret()
"JWT_SECRET_KEY environment variable is required. " if len(key) < 32:
"Generate one: python -c \"import secrets; print(secrets.token_hex(32))\""
)
elif len(SECRET_KEY) < 32 and os.environ.get("TESTING", "").lower() not in ("1", "true"):
import warnings as _warnings import warnings as _warnings
_warnings.warn( _warnings.warn(
f"JWT_SECRET_KEY is {len(SECRET_KEY)} chars — minimum 32 recommended", f"JWT_SECRET_KEY is {len(key)} chars — minimum 32 recommended",
UserWarning, stacklevel=2, UserWarning, stacklevel=2,
) )
return key
SECRET_KEY = _get_secret_key()
ALGORITHM = "HS256" ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_HOURS = 24 # 24 hours ACCESS_TOKEN_EXPIRE_HOURS = 24 # 24 hours

View file

@ -11,15 +11,34 @@ _instance_config: Optional[dict] = None
def load_instance_config() -> dict: def load_instance_config() -> dict:
"""Load instance.yaml using the existing config loader.""" """Load instance.yaml — checks API-generated config first, then static config.
Search order:
1. DATA_DIR/state/instance.yaml (written by /api/admin/configure, writable)
2. CONFIG_DIR/instance.yaml (static, read-only in Docker)
3. Empty dict with defaults (if neither exists)
"""
global _instance_config global _instance_config
if _instance_config is not None: if _instance_config is not None:
return _instance_config return _instance_config
# First, try API-generated config in writable data volume
import yaml
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
api_config_path = data_dir / "state" / "instance.yaml"
if api_config_path.exists():
try: try:
from config.loader import load_instance_config as _load, get_instance_value _instance_config = yaml.safe_load(api_config_path.read_text()) or {}
logger.info("Loaded instance.yaml from %s", api_config_path)
return _instance_config
except Exception as e:
logger.warning(f"Could not load API-generated instance.yaml: {e}")
# Fall back to static config (may have strict validation)
try:
from config.loader import load_instance_config as _load
_instance_config = _load() _instance_config = _load()
logger.info("Loaded instance.yaml") logger.info("Loaded instance.yaml from config/")
except Exception as e: except Exception as e:
logger.warning(f"Could not load instance.yaml: {e}. Using defaults.") logger.warning(f"Could not load instance.yaml: {e}. Using defaults.")
_instance_config = {} _instance_config = {}

View file

@ -48,8 +48,8 @@ def create_app() -> FastAPI:
) )
# Session middleware (required for OAuth state) # Session middleware (required for OAuth state)
import secrets as _secrets from app.secrets import get_session_secret
session_secret = os.environ.get("SESSION_SECRET", os.environ.get("JWT_SECRET_KEY", _secrets.token_hex(32))) session_secret = get_session_secret()
app.add_middleware(SessionMiddleware, secret_key=session_secret) app.add_middleware(SessionMiddleware, secret_key=session_secret)
# CORS for CLI and external clients # CORS for CLI and external clients
@ -62,6 +62,14 @@ def create_app() -> FastAPI:
allow_headers=["*"], allow_headers=["*"],
) )
# Load .env_overlay (persisted by /api/admin/configure)
_overlay = Path(os.environ.get("DATA_DIR", "./data")) / "state" / ".env_overlay"
if _overlay.exists():
for line in _overlay.read_text().splitlines():
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip())
# Load instance config on startup # Load instance config on startup
try: try:
from app.instance_config import load_instance_config from app.instance_config import load_instance_config
@ -70,6 +78,15 @@ def create_app() -> FastAPI:
except Exception as e: except Exception as e:
logger.warning(f"Could not load instance config: {e}") logger.warning(f"Could not load instance config: {e}")
# Startup banner
from src.db import SCHEMA_VERSION
logger.info(
"Agnes %s | channel: %s | schema v%s",
os.environ.get("AGNES_VERSION", "dev"),
os.environ.get("RELEASE_CHANNEL", "dev"),
SCHEMA_VERSION,
)
# Seed admin user for testing/CI (when SEED_ADMIN_EMAIL is set) # Seed admin user for testing/CI (when SEED_ADMIN_EMAIL is set)
seed_email = os.environ.get("SEED_ADMIN_EMAIL") seed_email = os.environ.get("SEED_ADMIN_EMAIL")
if seed_email: if seed_email:

43
app/secrets.py Normal file
View file

@ -0,0 +1,43 @@
"""Auto-generate and persist secrets that survive container restarts."""
import logging
import os
import secrets
from pathlib import Path
logger = logging.getLogger(__name__)
def _load_or_generate(env_var: str, file_name: str) -> str:
"""Load secret from env var, or from file, or generate and persist."""
val = os.environ.get(env_var, "")
if val:
return val
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
secret_path = data_dir / "state" / file_name
if secret_path.exists():
val = secret_path.read_text().strip()
if val:
return val
logger.warning("Secret file %s is empty, regenerating", secret_path)
secret_path.parent.mkdir(parents=True, exist_ok=True)
val = secrets.token_hex(32)
secret_path.write_text(val)
try:
secret_path.chmod(0o600)
except OSError:
pass # chmod not supported on all platforms (e.g., Windows)
logger.info(
"Auto-generated %s -> %s (set %s in .env to use a fixed value)",
file_name, secret_path, env_var,
)
return val
def get_jwt_secret() -> str:
"""Get JWT secret key from env, file, or auto-generate."""
return _load_or_generate("JWT_SECRET_KEY", ".jwt_secret")
def get_session_secret() -> str:
"""Get session secret from env, file, or auto-generate."""
return _load_or_generate("SESSION_SECRET", ".session_secret")

View file

@ -120,6 +120,7 @@ _URL_MAP = {
"email_auth.login_email_form": "/login/email", "email_auth.login_email_form": "/login/email",
"email_auth.send_magic_link": "/auth/email/send-link", "email_auth.send_magic_link": "/auth/email/send-link",
"register": "/auth/password/setup", "register": "/auth/password/setup",
"setup": "/setup",
} }
@ -177,6 +178,18 @@ async def index(request: Request, user: Optional[dict] = Depends(get_optional_us
return RedirectResponse(url="/login", status_code=302) return RedirectResponse(url="/login", status_code=302)
@router.get("/setup", response_class=HTMLResponse)
async def setup_wizard(request: Request, conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
"""First-time setup wizard. Redirects to dashboard if users already exist."""
try:
user_count = conn.execute("SELECT COUNT(*) FROM users").fetchone()[0]
if user_count > 0:
return RedirectResponse(url="/login", status_code=302)
except Exception:
pass # No users table yet — show setup
return templates.TemplateResponse(request, "setup.html", _build_context(request))
@router.get("/login", response_class=HTMLResponse) @router.get("/login", response_class=HTMLResponse)
async def login_page(request: Request): async def login_page(request: Request):
providers = [] providers = []

View file

@ -0,0 +1,267 @@
{% extends "base_login.html" %}
{% block title %}Setup - Agnes AI Data Analyst{% endblock %}
{% block content %}
<div class="login-page">
<div class="login-card-wrapper" style="max-width: 520px; margin: 40px auto; padding: 0 20px;">
<div class="login-card" style="max-width: 520px;">
<h2 id="wizard-title">Setup Agnes</h2>
<p class="login-description" id="wizard-description">
Create your admin account to get started.
</p>
<!-- Progress -->
<div style="display: flex; gap: 8px; margin-bottom: 24px;">
<div id="step-dot-1" style="flex: 1; height: 4px; border-radius: 2px; background: var(--primary, #2563eb);"></div>
<div id="step-dot-2" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
<div id="step-dot-3" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
<div id="step-dot-4" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
</div>
<!-- Status message -->
<div id="status-msg" style="display: none; padding: 10px 14px; border-radius: 6px; margin-bottom: 16px; font-size: 14px;"></div>
<!-- Step 1: Create Admin -->
<div id="step-1">
<form id="admin-form" onsubmit="return createAdmin(event)">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Email</label>
<input type="email" id="admin-email" required placeholder="admin@company.com"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Password</label>
<input type="password" id="admin-password" required minlength="8" placeholder="Min. 8 characters"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-admin">
Create Admin Account
</button>
</form>
</div>
<!-- Step 2: Data Source -->
<div id="step-2" style="display: none;">
<form id="source-form" onsubmit="return configureSource(event)">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Data Source</label>
<select id="data-source" onchange="toggleSourceFields()"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<option value="keboola">Keboola</option>
<option value="bigquery">BigQuery</option>
<option value="local">Local / CSV</option>
</select>
<div id="keboola-fields">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Keboola URL</label>
<input type="url" id="keboola-url" placeholder="https://connection.keboola.com"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Storage API Token</label>
<input type="password" id="keboola-token" placeholder="Your Keboola storage token"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
</div>
<div id="bigquery-fields" style="display: none;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">GCP Project</label>
<input type="text" id="bq-project" placeholder="my-gcp-project"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Location</label>
<input type="text" id="bq-location" value="us" placeholder="us"
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
</div>
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-source">
Configure Data Source
</button>
<button type="button" onclick="skipToStep(4)" class="btn btn-secondary" style="width: 100%; margin-top: 8px;" id="btn-skip-source">
Skip (configure later)
</button>
</form>
</div>
<!-- Step 3: Discover Tables -->
<div id="step-3" style="display: none;">
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
Discover and register tables from your data source.
</p>
<button onclick="discoverTables()" class="btn btn-primary" style="width: 100%;" id="btn-discover">
Discover Tables
</button>
<div id="discover-result" style="display: none; margin-top: 12px; padding: 12px; background: #f0fdf4; border-radius: 6px; font-size: 14px;"></div>
<button onclick="goToStep(4)" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none;" id="btn-next-sync">
Continue
</button>
</div>
<!-- Step 4: First Sync & Done -->
<div id="step-4" style="display: none;">
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
Start the first data sync and go to your dashboard.
</p>
<button onclick="triggerSync()" class="btn btn-primary" style="width: 100%;" id="btn-sync">
Start First Sync
</button>
<a href="/dashboard" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none; text-align: center; text-decoration: none;" id="btn-dashboard">
Go to Dashboard
</a>
</div>
</div>
</div>
</div>
<script>
let token = '';
const steps = {
1: { title: 'Setup Agnes', desc: 'Create your admin account to get started.' },
2: { title: 'Data Source', desc: 'Connect to your data source.' },
3: { title: 'Discover Tables', desc: 'Find and register tables from your data source.' },
4: { title: 'Almost Done', desc: 'Start syncing data and open your dashboard.' },
};
function showStatus(msg, type) {
const el = document.getElementById('status-msg');
el.textContent = msg;
el.style.display = 'block';
el.style.background = type === 'error' ? '#fef2f2' : '#f0fdf4';
el.style.color = type === 'error' ? '#dc2626' : '#16a34a';
}
function hideStatus() {
document.getElementById('status-msg').style.display = 'none';
}
function goToStep(n) {
hideStatus();
for (let i = 1; i <= 4; i++) {
document.getElementById('step-' + i).style.display = i === n ? 'block' : 'none';
document.getElementById('step-dot-' + i).style.background = i <= n ? 'var(--primary, #2563eb)' : '#e5e7eb';
}
document.getElementById('wizard-title').textContent = steps[n].title;
document.getElementById('wizard-description').textContent = steps[n].desc;
}
function skipToStep(n) {
goToStep(n);
}
function toggleSourceFields() {
const src = document.getElementById('data-source').value;
document.getElementById('keboola-fields').style.display = src === 'keboola' ? 'block' : 'none';
document.getElementById('bigquery-fields').style.display = src === 'bigquery' ? 'block' : 'none';
}
async function apiCall(url, body) {
const headers = { 'Content-Type': 'application/json' };
if (token) headers['Authorization'] = 'Bearer ' + token;
const resp = await fetch(url, { method: 'POST', headers, body: JSON.stringify(body) });
if (resp.status === 401) {
token = '';
sessionStorage.removeItem('setup_token');
showStatus('Session expired. Please refresh the page and start over.', 'error');
throw new Error('Session expired');
}
const data = await resp.json();
if (!resp.ok) throw new Error(data.detail || 'Request failed');
return data;
}
async function createAdmin(e) {
e.preventDefault();
const btn = document.getElementById('btn-admin');
btn.disabled = true;
btn.textContent = 'Creating...';
try {
const data = await apiCall('/auth/bootstrap', {
email: document.getElementById('admin-email').value,
password: document.getElementById('admin-password').value,
});
token = data.access_token;
sessionStorage.setItem('setup_token', token);
goToStep(2);
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Create Admin Account';
}
return false;
}
async function configureSource(e) {
e.preventDefault();
const btn = document.getElementById('btn-source');
btn.disabled = true;
btn.textContent = 'Verifying...';
try {
const src = document.getElementById('data-source').value;
const body = { data_source: src };
if (src === 'keboola') {
body.keboola_url = document.getElementById('keboola-url').value;
body.keboola_token = document.getElementById('keboola-token').value;
} else if (src === 'bigquery') {
body.bigquery_project = document.getElementById('bq-project').value;
body.bigquery_location = document.getElementById('bq-location').value;
}
await apiCall('/api/admin/configure', body);
showStatus('Connection verified!', 'success');
if (src === 'local') {
goToStep(4);
} else {
goToStep(3);
}
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Configure Data Source';
}
return false;
}
async function discoverTables() {
const btn = document.getElementById('btn-discover');
btn.disabled = true;
btn.textContent = 'Discovering...';
try {
const headers = { 'Content-Type': 'application/json' };
if (token) headers['Authorization'] = 'Bearer ' + token;
const resp = await fetch('/api/admin/discover-and-register', { method: 'POST', headers });
const data = await resp.json();
if (!resp.ok) throw new Error(data.detail || 'Discovery failed');
const el = document.getElementById('discover-result');
el.style.display = 'block';
el.textContent = `Registered ${data.registered} tables, skipped ${data.skipped}.`;
document.getElementById('btn-next-sync').style.display = 'block';
btn.style.display = 'none';
} catch (err) {
showStatus(err.message, 'error');
} finally {
btn.disabled = false;
btn.textContent = 'Discover Tables';
}
}
async function triggerSync() {
const btn = document.getElementById('btn-sync');
btn.disabled = true;
btn.textContent = 'Starting sync...';
try {
const headers = {};
if (token) headers['Authorization'] = 'Bearer ' + token;
await fetch('/api/sync/trigger', { method: 'POST', headers });
btn.style.display = 'none';
document.getElementById('btn-dashboard').style.display = 'block';
showStatus('Sync started! You can now go to your dashboard.', 'success');
} catch (err) {
showStatus(err.message, 'error');
btn.disabled = false;
btn.textContent = 'Start First Sync';
}
}
// Restore token from sessionStorage (in case of page reload)
const savedToken = sessionStorage.getItem('setup_token');
if (savedToken) token = savedToken;
</script>
{% endblock %}

11
docker-compose.ci.yml Normal file
View file

@ -0,0 +1,11 @@
# CI smoke test overlay — minimal config for testing in GitHub Actions.
# Usage: docker compose -f docker-compose.yml -f docker-compose.ci.yml up -d
services:
app:
environment:
- JWT_SECRET_KEY=smoke-test-ci-key-minimum-32-chars-xx
- SESSION_SECRET=smoke-test-session-key-32-chars-min-x
- DATA_DIR=/data
- TESTING=0
ports:
- "8000:8000"

View file

@ -1,17 +1,18 @@
# Production override — uses pre-built GHCR image instead of local build. # Production override — uses pre-built GHCR image instead of local build.
# Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d # Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
# Override tag: AGNES_TAG=stable-2026.04.3 docker compose -f ... up -d
services: services:
app: app:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
scheduler: scheduler:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
extract: extract:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
telegram-bot: telegram-bot:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
ws-gateway: ws-gateway:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
corporate-memory: corporate-memory:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
session-collector: session-collector:
image: ghcr.io/keboola/agnes-the-ai-analyst:latest image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}

View file

@ -7,6 +7,7 @@ services:
volumes: volumes:
- data:/data - data:/data
- ./config:/app/config:ro - ./config:/app/config:ro
# - ./custom-connectors:/app/connectors/custom:ro # Tier A: AI-generated connectors
env_file: .env env_file: .env
environment: environment:
- DATA_DIR=/data - DATA_DIR=/data

37
docs/RELEASE_TEMPLATE.md Normal file
View file

@ -0,0 +1,37 @@
# Release Notes Template
Use this template when adding a new entry to `CHANGELOG.md`.
---
## stable-YYYY.MM.N
**Image:** `ghcr.io/keboola/agnes-the-ai-analyst:stable-YYYY.MM.N`
**Digest:** `sha256:...` (from `docker inspect --format='{{index .RepoDigests 0}}'`)
**Date:** YYYY-MM-DD
### Added
- Feature description
### Changed
- Change description
### Fixed
- Bug fix description
### Breaking Changes
- Description of breaking change
- **Migration guide:** Steps to upgrade from previous version
### Deprecated
- Description of deprecated feature (will be removed in YYYY.MM.N)
---
## Guidelines
- Every merge to `main` creates a new `stable-YYYY.MM.N` release
- Include the image digest for verification with `cosign verify`
- Breaking changes require `BREAKING:` prefix in commit message
- Migration guides must include exact commands or config changes
- If a release deprecates the previous stable, note it explicitly

View file

@ -0,0 +1,527 @@
# Multi-Instance Deployment & Versioning — Design Spec
## Goal
Make Agnes deployable to 20+ independent customer instances via self-service, with safe versioning that prevents one customer's PR from breaking another's deployment.
## Context
Agnes is an open-source AI Data Analyst platform. Customers (or their AI agents) deploy it as a Docker image on their own infrastructure. Each instance connects to different data sources (Keboola, BigQuery, Jira, custom).
**Key constraints:**
- Customers range from semi-technical to non-technical, assisted by AI agents
- Cloud-agnostic (GCP, AWS, Azure, on-prem, VPS)
- One repo, one Docker image, many instances
- Community PRs must not break existing customers
- AI agent is the primary "installer" and "developer"
---
## 1. Versioning & Release Channels
### CalVer: `YYYY.MM.N`
Format: year.month.sequential-number. Example: `2026.04.1`, `2026.04.2`, `2026.05.1`.
No manual release decisions. Every merge to main is a release.
### Three channels
| Channel | Floating tag | Versioned tag | Source | Who uses it |
|---------|-------------|---------------|--------|-------------|
| **dev** | `:dev` | `:dev-2026.04.N` | Every CI-passing push on any feature branch | Developers, PR testing |
| **stable** | `:stable` | `:stable-2026.04.N` | Every merge to main + CI pass | All production customers |
| **deprecated** | — | `:deprecated-2026.04.N` | Previous stable after breaking change or failed smoke test | Grace period (30 days) |
Every image also gets a `:sha-abc1234` tag for exact commit traceability.
### Tag lifecycle
```
feature branch push → CI ✅ → :dev + :dev-2026.04.N + :sha-abc1234
❌ → nothing pushed
merge to main → CI ✅ → :stable + :stable-2026.04.N + :sha-abc1234
❌ → merge blocked (CI required)
smoke test on canary VM
✅ → :stable confirmed
❌ → alert, rollback canary to previous :stable
broken build tagged :deprecated-2026.04.N
```
### Version numbering
CalVer `YYYY.MM.N` where N is a global auto-incrementing counter per month across both channels.
Example timeline:
```
Apr 8 feature/foo push → :dev-2026.04.1
Apr 8 feature/bar push → :dev-2026.04.2
Apr 8 merge foo to main → :stable-2026.04.3
Apr 9 feature/baz push → :dev-2026.04.4
Apr 9 merge bar to main → :stable-2026.04.5
```
This avoids confusion — version `2026.04.3` exists only once, in one channel.
### Customer pins version
```yaml
# docker-compose.prod.yml
# Auto-update (recommended): always latest stable
image: ghcr.io/keboola/agnes-the-ai-analyst:stable
# Pinned: specific stable release, manual update
image: ghcr.io/keboola/agnes-the-ai-analyst:stable-2026.04.3
# Testing: latest dev
image: ghcr.io/keboola/agnes-the-ai-analyst:dev
# Testing: specific dev build
image: ghcr.io/keboola/agnes-the-ai-analyst:dev-2026.04.2
```
### Main = stable
- `main` branch is always releasable
- Every merge to main triggers a new stable release
- Feature branches are the dev channel
- No promotion pipeline, no manual approval for releases
- Smoke test is a post-deploy safety net, not a gate
---
## 2. Breaking Change Detection
### What is a breaking change
- `_meta` table schema change (add/remove column)
- `_remote_attach` table schema change
- API endpoint removed or response field removed
- DuckDB system schema migration that drops data
- CLI command removed or argument renamed
- `instance.yaml` required key added
### Automated detection in CI
Every PR runs:
1. **Contract tests**: `_meta` and `_remote_attach` schema validation against frozen spec
2. **OpenAPI diff**: Compare PR's `openapi.json` against main's. Flag removed endpoints/fields.
3. **DuckDB schema diff**: Compare table definitions in system.duckdb
4. **Config diff**: Compare `instance.yaml.example` required keys
5. **Full connector matrix**: ALL connectors tested, not just changed ones
If breaking change detected:
- PR gets `BREAKING` label automatically
- Requires 2 reviewers (elevated review)
- Commit message must have `BREAKING:` prefix
- CHANGELOG.md entry with migration guide required
- On merge: previous stable tagged as `:deprecated-YYYY.MM.N`
### Deprecated channel
When a breaking change merges:
1. Previous stable image retagged to `:deprecated-2026.04.N`
2. New build becomes `:stable` + `:2026.04.(N+1)`
3. Health endpoint on deprecated version shows warning:
```json
{"warnings": ["Running deprecated version 2026.04.3. Update to stable."]}
```
4. Deprecated images removed from GHCR after 30 days
---
## 3. Smoke Test (Post-Deploy Safety Net)
### What it tests
Automated sequence run on canary VM after every `:stable` deploy:
```
1. GET /api/health → status != "unhealthy"
2. POST /auth/token → 200 (valid credentials)
3. GET /api/catalog/tables → count > 0
4. POST /api/query {sql: "SELECT 1"} → 200 + rows
5. POST /api/sync/trigger → 200
6. (wait 30s)
7. GET /api/health → check no new errors
```
### On failure
1. Alert (GitHub issue + optional webhook)
2. Canary VM rolled back to previous stable: `docker compose pull && docker compose up -d` with previous tag
3. Failed build tagged `:deprecated-YYYY.MM.N`
4. `:stable` tag reverted to previous good build
### Implementation
GitHub Actions workflow triggered after the build-and-push workflow completes:
```yaml
smoke-test:
needs: build-and-push
runs-on: ubuntu-latest
steps:
- name: Deploy to canary
run: |
gcloud compute ssh canary-vm --command="
cd /opt/agnes &&
docker compose pull &&
docker compose up -d"
- name: Wait for healthy
run: |
for i in $(seq 1 30); do
STATUS=$(curl -sf canary:8000/api/health | jq -r .status)
[ "$STATUS" != "unhealthy" ] && break
sleep 10
done
- name: Run smoke tests
run: |
# auth, catalog, query, sync checks
./scripts/smoke-test.sh canary:8000
- name: Rollback on failure
if: failure()
run: |
# retag and rollback
```
---
## 4. Self-Service Deployment
### Target experience
Customer (or their AI agent) goes from zero to running instance:
```bash
# 1. Get the code
git clone https://github.com/keboola/agnes-the-ai-analyst.git
cd agnes-the-ai-analyst
# 2. Start it
docker compose up -d
# 3. Open browser or use API
# First visit: /setup wizard (no users exist)
# Or headless: curl -X POST localhost:8000/auth/bootstrap ...
```
### Two setup modes
**A) Interactive (browser):**
- First visit when no users exist → redirected to `/setup`
- Step 1: Create admin account (email + password)
- Step 2: Choose data source (Keboola / BigQuery / CSV / Custom)
- Step 3: Enter credentials (token, URL)
- Step 4: Auto-discover and register tables
- Step 5: Trigger first sync
- Done → redirect to dashboard
**B) Headless (AI agent / CLI):**
```bash
# Bootstrap admin
curl -X POST http://localhost:8000/auth/bootstrap \
-H "Content-Type: application/json" \
-d '{"email":"admin@company.com","password":"SecurePass123!"}'
# Configure data source
curl -X POST http://localhost:8000/api/admin/configure \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"data_source":"keboola","keboola_token":"...","keboola_url":"..."}'
# Discover and register tables
curl -X POST http://localhost:8000/api/admin/discover-and-register \
-H "Authorization: Bearer $TOKEN"
# Trigger first sync
curl -X POST http://localhost:8000/api/sync/trigger \
-H "Authorization: Bearer $TOKEN"
```
Both modes lead to same result. AI agent uses headless.
### Auto-configuration
On first `docker compose up` with no `.env`:
- `JWT_SECRET_KEY` auto-generated and persisted to `/data/state/.jwt_secret`
- `SESSION_SECRET` auto-generated similarly
- App starts in "setup mode" — only `/setup`, `/auth/bootstrap`, and `/api/health` accessible
On first `docker compose up` with `.env` containing `KEBOOLA_STORAGE_TOKEN`:
- Auto-discovers tables from Keboola on first sync
- Skips manual table registration step
### What customer must provide
| Required | Optional |
|----------|----------|
| Server with Docker | Custom domain + TLS |
| Admin email + password | Google OAuth credentials |
| Data source credentials (Keboola token OR BigQuery creds OR CSV files) | Telegram bot token |
| | Jira webhook secret |
### What customer must NOT do
- Edit YAML manually (setup wizard generates `instance.yaml`)
- Generate JWT secret (auto-generated)
- Register tables manually (auto-discovery)
- Understand DuckDB internals
---
## 5. Custom Connectors (Three Tiers)
All tiers produce the same output: `extract.duckdb` with `_meta` table + `data/*.parquet`. Orchestrator treats them identically.
### Tier A: Local mount (fastest, AI-generated)
Customer's AI agent generates a connector. Lives outside Docker image, survives updates.
```
/opt/agnes/
├── docker-compose.yml ← official image
├── docker-compose.override.yml ← customer additions
└── custom-connectors/
└── snowflake/
├── extractor.py
└── requirements.txt
```
```yaml
# docker-compose.override.yml
services:
app:
volumes:
- ./custom-connectors:/app/connectors/custom:ro
```
Orchestrator scans `connectors/custom/*/` in addition to built-in connectors.
**How the AI agent creates one:**
1. Reads CLAUDE.md → understands extract.duckdb contract
2. Reads existing connector as reference (e.g., `connectors/keboola/extractor.py`)
3. Generates `custom-connectors/snowflake/extractor.py`
4. Runs contract test to validate output
5. Done — orchestrator picks it up on next rebuild
**Requirements for this to work:**
- CLAUDE.md must perfectly describe the contract
- Contract test must be runnable standalone
- Existing connectors must be readable as examples
- Clear error messages when contract doesn't match
### Tier B: Standalone container (complex dependencies)
For connectors needing their own runtime (Java, .NET, heavy Python packages).
```yaml
# docker-compose.override.yml
services:
connector-sap:
build: ./custom-connectors/sap
volumes:
- data:/data
environment:
- DATA_DIR=/data
- SAP_HOST=...
profiles:
- extract
```
Connector is its own Docker image. Writes to `/data/extracts/sap/extract.duckdb`. Orchestrator finds it automatically.
### Tier C: Community PR (shared with all)
Connector contributed to main repo via PR. After merge, available in official image for all customers.
```
connectors/
├── keboola/ ← built-in
├── bigquery/ ← built-in
├── jira/ ← built-in
└── snowflake/ ← community contributed
```
**PR requirements:**
- Must pass contract tests
- Must include tests
- Must not modify shared code (orchestrator, API, auth)
- CI runs full connector matrix
---
## 6. CI/CD Pipeline
### On feature branch push
```yaml
ci.yml:
- tests (all 654+)
- contract tests (all connectors)
- docker build
- push :dev + :dev-sha-xxx to GHCR
```
### On merge to main
```yaml
release.yml:
- tests (all)
- contract tests (all connectors)
- breaking change detection (OpenAPI diff, schema diff)
- docker build
- push :stable + :YYYY.MM.N + :sha-xxx to GHCR
- trigger smoke test on canary
smoke-test.yml (triggered):
- deploy to canary VM
- run smoke test sequence
- on failure: rollback canary, tag build as deprecated, create alert
```
### On PR
```yaml
pr-check.yml:
- tests
- contract tests
- breaking change detection
- label PR: "BREAKING" if detected
- require 2 reviewers if breaking
```
---
## 7. Infrastructure (Cloud-Agnostic)
### Primary: Docker Compose
Works everywhere Docker runs. This is the default and only required deployment method.
```bash
git clone https://github.com/keboola/agnes-the-ai-analyst.git
cd agnes-the-ai-analyst
docker compose up -d
```
### Optional: Terraform (GCP)
For automated provisioning. Lives in `infra/` with GCS remote state backend.
```bash
cd infra
terraform workspace new customer-name
terraform apply -var-file=instances/customer-name.tfvars
```
Creates VM, installs Docker, clones repo, generates `.env` and `instance.yaml`, starts Docker Compose.
### Optional: Caddy TLS
Production profile adds Caddy reverse proxy with automatic Let's Encrypt:
```bash
DOMAIN=data.customer.com docker compose --profile production up -d
```
### Directory layout on customer server
```
/opt/agnes/ ← git clone
├── docker-compose.yml ← official
├── docker-compose.prod.yml ← GHCR images
├── docker-compose.override.yml ← customer customizations
├── .env ← secrets (gitignored)
├── config/
│ └── instance.yaml ← generated by setup wizard
├── custom-connectors/ ← Tier A connectors
│ └── snowflake/
└── Caddyfile ← TLS config
/data/ ← Docker volume (persistent)
├── state/system.duckdb ← users, registry, sync state
├── analytics/server.duckdb ← views into extracts
└── extracts/ ← per-source data
├── keboola/extract.duckdb
├── bigquery/extract.duckdb
└── snowflake/extract.duckdb ← from custom connector
```
---
## 8. AI Agent as Primary Installer
CLAUDE.md and documentation must be optimized for AI agent consumption:
### CLAUDE.md requirements
- Complete extract.duckdb contract with exact SQL for `_meta` and `_remote_attach`
- Step-by-step setup instructions with exact curl commands
- Existing connectors as reference for AI-generated new ones
- Clear error messages explaining what went wrong and how to fix
### API requirements
- All setup operations available as API calls (not just UI)
- Self-describing error messages: `"Missing KEBOOLA_STORAGE_TOKEN. Set it in .env or pass via /api/admin/configure"`
- `/api/health` returns structured diagnostics AI agent can parse
- `/api/admin/configure` accepts data source config without file editing
### Documentation requirements
- Machine-readable (no screenshots, no "click here")
- Every manual step has an equivalent API/CLI command
- QUICKSTART.md optimized for copy-paste by AI agent
---
## 9. What Needs to Be Built
### Must have (blocks multi-instance)
| # | What | Effort |
|---|------|--------|
| 1 | CalVer auto-tagging in CI (release.yml) | 1 day |
| 2 | Smoke test script + CI workflow | 1 day |
| 3 | Breaking change detection in CI (OpenAPI diff, contract diff) | 2 days |
| 4 | `/setup` wizard (web) + `/api/admin/configure` (headless) | 3 days |
| 5 | Auto-generate JWT_SECRET_KEY on first start | 0.5 day |
| 6 | Auto-discovery for Keboola tables on first sync | 1 day |
| 7 | Custom connector mount support in orchestrator | 1 day |
| 8 | `CHANGELOG.md` + release notes template | 0.5 day |
| 9 | Health endpoint version + channel info | 0.5 day |
### Should have (improves experience)
| # | What | Effort |
|---|------|--------|
| 10 | Deprecated version warning in health endpoint | 0.5 day |
| 11 | `/api/admin/discover-and-register` auto-discovery endpoint | 1 day |
| 12 | Standalone container connector example (Tier B) | 0.5 day |
| 13 | CLAUDE.md optimization for AI agent setup | 1 day |
| 14 | Terraform module refactor for multi-workspace | 1 day |
### Nice to have (future)
| # | What |
|---|------|
| 15 | Community connector contribution guide |
| 16 | Instance health dashboard (central monitoring) |
| 17 | Automated backup (GCP disk snapshots) |
| 18 | Usage analytics (opt-in telemetry) |
---
## Non-Goals
- Multi-tenancy in single process (each customer = separate instance)
- Kubernetes/Helm (Docker Compose is sufficient for target scale)
- Paid tier / license keys (open-source, monetization TBD)
- GUI for connector development (AI agent + CLAUDE.md is sufficient)

View file

@ -0,0 +1,16 @@
"""Generate OpenAPI snapshot from the current FastAPI app."""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.environ.setdefault("TESTING", "1")
os.environ.setdefault("JWT_SECRET_KEY", "snapshot-generation-key-32-chars-min!!")
from app.main import create_app # noqa: E402
app = create_app()
schema = app.openapi()
json.dump(schema, sys.stdout, indent=2, sort_keys=True)
sys.stdout.write("\n")

97
scripts/smoke-test.sh Executable file
View file

@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Agnes smoke test — verifies a running instance is functional.
# Usage: ./scripts/smoke-test.sh [host:port]
# Default: http://localhost:8000
set -euo pipefail
HOST="${1:-http://localhost:8000}"
PASS=0
FAIL=0
TOKEN=""
check() {
local name="$1" ok="$2"
if [ "$ok" = "true" ]; then
echo " PASS $name"
PASS=$((PASS + 1))
else
echo " FAIL $name"
FAIL=$((FAIL + 1))
fi
}
echo "Smoke test: $HOST"
echo "---"
# 1. Health check
HEALTH=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
if [ "$HEALTH" = "unhealthy" ] || [ "$HEALTH" = "unreachable" ]; then
echo " FATAL: health=$HEALTH"
exit 1
fi
check "health ($HEALTH)" "true"
# 2. Health has version fields
HAS_VERSION=$(curl -sf "$HOST/api/health" | python3 -c "
import sys,json
d=json.load(sys.stdin)
print('true' if 'version' in d and 'channel' in d and 'schema_version' in d else 'false')
" 2>/dev/null || echo "false")
check "health version fields" "$HAS_VERSION"
# 3. Bootstrap (only works on fresh DB; 403 means users exist)
BOOT_HTTP=$(curl -s -o /tmp/smoke_boot.json -w "%{http_code}" -X POST "$HOST/auth/bootstrap" \
-H "Content-Type: application/json" \
-d '{"email":"smoke@test.local","name":"Smoke Test","password":"SmokeTest123!"}' 2>/dev/null || echo "000")
if [ "$BOOT_HTTP" = "200" ]; then
TOKEN=$(python3 -c "import json; print(json.load(open('/tmp/smoke_boot.json'))['access_token'])" 2>/dev/null || echo "")
check "bootstrap (new admin)" "true"
elif [ "$BOOT_HTTP" = "403" ]; then
TOKEN="${SMOKE_TOKEN:-}"
echo " SKIP bootstrap (users exist)"
else
check "bootstrap (HTTP $BOOT_HTTP)" "false"
fi
# 4. Query SELECT 1 (requires auth)
if [ -n "$TOKEN" ]; then
QUERY_OK=$(curl -sf -X POST "$HOST/api/query" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"sql":"SELECT 1 as test"}' | python3 -c "
import sys,json
d=json.load(sys.stdin)
print('true' if len(d.get('rows',[])) > 0 else 'false')
" 2>/dev/null || echo "false")
check "query SELECT 1" "$QUERY_OK"
else
echo " SKIP query (no token)"
fi
# 5. Sync trigger
if [ -n "$TOKEN" ]; then
SYNC_HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$HOST/api/sync/trigger" \
-H "Authorization: Bearer $TOKEN" 2>/dev/null || echo "000")
if [[ "$SYNC_HTTP" =~ ^(200|202)$ ]]; then
check "sync trigger" "true"
else
check "sync trigger (HTTP $SYNC_HTTP)" "false"
fi
else
echo " SKIP sync (no token)"
fi
# 6. Post-sync health (wait briefly)
sleep 5
HEALTH2=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
if [ "$HEALTH2" = "unhealthy" ] || [ "$HEALTH2" = "unreachable" ]; then
check "post-sync health ($HEALTH2)" "false"
else
check "post-sync health ($HEALTH2)" "true"
fi
# Results
echo ""
echo "Results: $PASS passed, $FAIL failed"
[ "$FAIL" -eq 0 ] || exit 1

View file

@ -4,12 +4,16 @@ Provides get_system_db() for the system state database
and get_analytics_db() for the analytics database with parquet views. and get_analytics_db() for the analytics database with parquet views.
""" """
import logging
import os import os
import re import re
import shutil
from pathlib import Path from pathlib import Path
import duckdb import duckdb
logger = logging.getLogger(__name__)
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$") _SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
SCHEMA_VERSION = 3 SCHEMA_VERSION = 3
@ -260,6 +264,25 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
"""Create tables if they don't exist. Apply migrations if schema version changed.""" """Create tables if they don't exist. Apply migrations if schema version changed."""
current = get_schema_version(conn) current = get_schema_version(conn)
if current < SCHEMA_VERSION: if current < SCHEMA_VERSION:
# Snapshot before migration for rollback support
if current > 0:
try:
db_path = Path(os.environ.get("DATA_DIR", "./data")) / "state" / "system.duckdb"
if db_path.exists():
# Flush WAL to main DB file before copying
try:
conn.execute("CHECKPOINT")
except Exception:
pass # CHECKPOINT may fail on read-only or in-memory DBs
snapshot = db_path.parent / "system.duckdb.pre-migrate"
shutil.copy2(str(db_path), str(snapshot))
# Also copy WAL if it still exists (belt and suspenders)
wal_path = Path(str(db_path) + ".wal")
if wal_path.exists():
shutil.copy2(str(wal_path), str(snapshot) + ".wal")
logger.info("Pre-migration snapshot saved: %s", snapshot)
except Exception as e:
logger.warning("Could not create pre-migration snapshot: %s", e)
conn.execute(_SYSTEM_SCHEMA) conn.execute(_SYSTEM_SCHEMA)
if current == 0: if current == 0:
conn.execute( conn.execute(

5151
tests/snapshots/openapi.json Normal file

File diff suppressed because it is too large Load diff

View file

@ -144,6 +144,205 @@ class TestGetAnalyticsDb:
conn.close() conn.close()
class TestMigrationSafety:
"""Tests for schema migration correctness, idempotency, and safety snapshots."""
# Minimal v2 table_registry (no is_public column — that comes in v3)
_V2_TABLE_REGISTRY = """
CREATE TABLE table_registry (
id VARCHAR PRIMARY KEY,
name VARCHAR NOT NULL,
source_type VARCHAR,
bucket VARCHAR,
source_table VARCHAR,
sync_strategy VARCHAR DEFAULT 'full_refresh',
query_mode VARCHAR DEFAULT 'local',
sync_schedule VARCHAR,
profile_after_sync BOOLEAN DEFAULT true,
primary_key VARCHAR,
folder VARCHAR,
description TEXT,
registered_by VARCHAR,
registered_at TIMESTAMP DEFAULT current_timestamp
);
"""
def _create_v2_db(self, db_path):
"""Create a minimal v2-schema DuckDB file at db_path."""
import duckdb as _duckdb
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (2);"
)
conn.execute(self._V2_TABLE_REGISTRY)
# Stub out remaining tables so _ensure_schema doesn't fail
for ddl in [
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
]:
conn.execute(ddl)
finally:
conn.close()
def test_v2_to_v3_migration(self, tmp_path, monkeypatch):
"""v2 DB migrated to v3: schema_version=3 and is_public column added."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version
db_path = tmp_path / "state" / "system.duckdb"
self._create_v2_db(db_path)
conn = _duckdb.connect(str(db_path))
try:
_ensure_schema(conn)
assert get_schema_version(conn) == 3
cols = {
r[0]
for r in conn.execute(
"SELECT column_name FROM information_schema.columns WHERE table_name='table_registry'"
).fetchall()
}
assert "is_public" in cols
finally:
conn.close()
def test_migration_idempotency(self, tmp_path, monkeypatch):
"""Calling _ensure_schema twice on a fresh DB raises no error and leaves version at 3."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version, SCHEMA_VERSION
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
_ensure_schema(conn)
_ensure_schema(conn)
assert get_schema_version(conn) == SCHEMA_VERSION
finally:
conn.close()
def test_migration_preserves_data(self, tmp_path, monkeypatch):
"""Data inserted before migration is preserved after migration runs."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version, _SYSTEM_SCHEMA
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
# Build a v1 schema manually
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (1);"
)
conn.execute("""
CREATE TABLE table_registry (
id VARCHAR PRIMARY KEY,
name VARCHAR NOT NULL,
folder VARCHAR,
sync_strategy VARCHAR,
primary_key VARCHAR,
description TEXT,
registered_by VARCHAR,
registered_at TIMESTAMP DEFAULT current_timestamp
);
""")
conn.execute(
"INSERT INTO table_registry (id, name, description) VALUES ('row1', 'MyTable', 'kept')"
)
# Stub remaining tables
for ddl in [
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
]:
conn.execute(ddl)
_ensure_schema(conn)
assert get_schema_version(conn) == 3
row = conn.execute(
"SELECT name, description FROM table_registry WHERE id='row1'"
).fetchone()
assert row is not None, "Pre-migration row was lost"
assert row[0] == "MyTable"
assert row[1] == "kept"
finally:
conn.close()
def test_pre_migration_snapshot_created(self, tmp_path, monkeypatch):
"""A pre-migrate snapshot is written when migrating an existing (non-fresh) DB."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
from src.db import get_system_db
# Create a v2 DB at the expected path before calling get_system_db
db_path = tmp_path / "state" / "system.duckdb"
self._create_v2_db(db_path)
conn = get_system_db()
try:
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
assert snapshot.exists(), "Pre-migration snapshot was not created"
finally:
conn.close()
def test_no_snapshot_on_fresh_db(self, tmp_path, monkeypatch):
"""No pre-migrate snapshot is created when initialising a brand-new DB."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
from src.db import get_system_db
conn = get_system_db()
try:
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
assert not snapshot.exists(), "Snapshot should not exist for a fresh DB"
finally:
conn.close()
def test_future_version_is_noop(self, tmp_path, monkeypatch):
"""_ensure_schema does nothing when schema_version > SCHEMA_VERSION."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import duckdb as _duckdb
from src.db import _ensure_schema, get_schema_version
db_path = tmp_path / "state" / "system.duckdb"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = _duckdb.connect(str(db_path))
try:
conn.execute(
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
"INSERT INTO schema_version (version) VALUES (99);"
)
_ensure_schema(conn)
assert get_schema_version(conn) == 99
finally:
conn.close()
class TestGetAnalyticsDbReadonly: class TestGetAnalyticsDbReadonly:
def test_analytics_readonly_rejects_malicious_dir_name(self, tmp_path, monkeypatch): def test_analytics_readonly_rejects_malicious_dir_name(self, tmp_path, monkeypatch):
"""Directories with SQL-injection chars in their name are skipped.""" """Directories with SQL-injection chars in their name are skipped."""

View file

@ -0,0 +1,73 @@
"""OpenAPI snapshot test — detect breaking API changes.
Compares the current app's OpenAPI schema against a committed snapshot.
Fails if any path or HTTP method has been removed (breaking change).
To update the snapshot after an intentional change:
make update-openapi-snapshot
"""
import json
import os
from pathlib import Path
import pytest
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "openapi.json"
@pytest.fixture(scope="module")
def current_schema():
os.environ.setdefault("TESTING", "1")
from app.main import create_app
app = create_app()
return app.openapi()
def test_snapshot_exists():
"""Committed OpenAPI snapshot must exist."""
assert SNAPSHOT_PATH.exists(), (
"No OpenAPI snapshot found. Generate one with: make update-openapi-snapshot"
)
def test_no_removed_paths(current_schema):
"""No API paths should be removed compared to the snapshot."""
if not SNAPSHOT_PATH.exists():
pytest.skip("No snapshot to compare against")
snapshot = json.loads(SNAPSHOT_PATH.read_text())
current_paths = set(current_schema.get("paths", {}))
snapshot_paths = set(snapshot.get("paths", {}))
removed = snapshot_paths - current_paths
assert not removed, (
f"BREAKING: {len(removed)} API path(s) removed: {sorted(removed)}\n"
"If intentional, run: make update-openapi-snapshot"
)
def test_no_removed_methods(current_schema):
"""No HTTP methods should be removed from existing paths."""
if not SNAPSHOT_PATH.exists():
pytest.skip("No snapshot to compare against")
snapshot = json.loads(SNAPSHOT_PATH.read_text())
current_paths = current_schema.get("paths", {})
snapshot_paths = snapshot.get("paths", {})
breaking = []
for path in set(snapshot_paths) & set(current_paths):
removed_methods = set(snapshot_paths[path]) - set(current_paths[path])
# Ignore non-HTTP keys like 'parameters'
http_methods = {"get", "post", "put", "delete", "patch", "head", "options"}
removed_http = removed_methods & http_methods
if removed_http:
breaking.append(f" {path}: {sorted(removed_http)}")
assert not breaking, (
f"BREAKING: HTTP methods removed from {len(breaking)} path(s):\n"
+ "\n".join(breaking)
+ "\nIf intentional, run: make update-openapi-snapshot"
)

View file

@ -304,26 +304,37 @@ class TestJwtClaims:
# ---- JWT Secret Hardening ---- # ---- JWT Secret Hardening ----
class TestJwtSecretHardening: class TestJwtSecretHardening:
def test_raises_without_jwt_secret_in_non_test_env(self): def test_auto_generates_jwt_secret_when_absent(self, tmp_path):
"""Module-level code must raise RuntimeError when JWT_SECRET_KEY is absent """When JWT_SECRET_KEY is absent and TESTING is not set,
and TESTING is not set, preventing accidental production deploys with no secret.""" the secret is auto-generated and persisted to a file."""
saved_key = os.environ.pop("JWT_SECRET_KEY", None) saved_key = os.environ.pop("JWT_SECRET_KEY", None)
saved_testing = os.environ.pop("TESTING", None) saved_testing = os.environ.pop("TESTING", None)
# Eject any cached module so the re-import re-executes module-level code saved_data_dir = os.environ.get("DATA_DIR")
os.environ["DATA_DIR"] = str(tmp_path)
# Eject cached modules so the re-import re-executes module-level code
sys.modules.pop("app.auth.jwt", None) sys.modules.pop("app.auth.jwt", None)
sys.modules.pop("app.secrets", None)
try: try:
with pytest.raises(RuntimeError, match="JWT_SECRET_KEY environment variable is required"):
importlib.import_module("app.auth.jwt") importlib.import_module("app.auth.jwt")
secret_file = tmp_path / "state" / ".jwt_secret"
assert secret_file.exists(), "JWT secret file should be auto-generated"
secret = secret_file.read_text().strip()
assert len(secret) == 64, "Auto-generated secret should be 64 hex chars (32 bytes)"
finally: finally:
# Restore environment before re-importing so the module loads cleanly # Restore environment before re-importing so the module loads cleanly
if saved_key is not None: if saved_key is not None:
os.environ["JWT_SECRET_KEY"] = saved_key os.environ["JWT_SECRET_KEY"] = saved_key
if saved_testing is not None: if saved_testing is not None:
os.environ["TESTING"] = saved_testing os.environ["TESTING"] = saved_testing
if saved_data_dir is not None:
os.environ["DATA_DIR"] = saved_data_dir
else:
os.environ.pop("DATA_DIR", None)
# If neither was set (bare test run), use TESTING flag so reload works # If neither was set (bare test run), use TESTING flag so reload works
if saved_key is None and saved_testing is None: if saved_key is None and saved_testing is None:
os.environ["TESTING"] = "1" os.environ["TESTING"] = "1"
sys.modules.pop("app.auth.jwt", None) sys.modules.pop("app.auth.jwt", None)
sys.modules.pop("app.secrets", None)
importlib.import_module("app.auth.jwt") importlib.import_module("app.auth.jwt")
# Clean up the temporary TESTING flag if we added it # Clean up the temporary TESTING flag if we added it
if saved_key is None and saved_testing is None: if saved_key is None and saved_testing is None: