Merge pull request #1 from keboola/feature/v2-fastapi-duckdb-docker-cli
feat: multi-instance deployment (14 must-have items)
This commit is contained in:
commit
dbc57d1de3
27 changed files with 7008 additions and 67 deletions
31
.github/workflows/deploy.yml
vendored
31
.github/workflows/deploy.yml
vendored
|
|
@ -1,8 +1,9 @@
|
|||
name: Build & Push
|
||||
# SUPERSEDED by release.yml — CalVer tagging with stable/dev channels.
|
||||
# Kept for manual trigger only. Automated builds use release.yml.
|
||||
name: Build & Push (legacy)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch: {}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
|
@ -24,27 +25,3 @@ jobs:
|
|||
run: pytest tests/ -v --tb=short
|
||||
env:
|
||||
TESTING: "1"
|
||||
|
||||
build-and-push:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
packages: write
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}:latest
|
||||
ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||
|
|
|
|||
148
.github/workflows/release.yml
vendored
Normal file
148
.github/workflows/release.yml
vendored
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, "feature/**"]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv pip install --system ".[dev]"
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
env:
|
||||
TESTING: "1"
|
||||
|
||||
build-and-push:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
image_tag: ${{ steps.meta.outputs.versioned_tag }}
|
||||
version: ${{ steps.meta.outputs.version }}
|
||||
channel: ${{ steps.meta.outputs.channel }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
fetch-tags: true
|
||||
|
||||
- name: Claim version tag (with retry to avoid race conditions)
|
||||
id: meta
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
YEAR_MONTH=$(date +%Y.%m)
|
||||
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
|
||||
CHANNEL="stable"
|
||||
else
|
||||
CHANNEL="dev"
|
||||
fi
|
||||
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
|
||||
|
||||
# Claim a unique version by pushing a git tag BEFORE building.
|
||||
# Retry up to 5 times if another CI run took our N.
|
||||
TAG_CLAIMED=false
|
||||
for ATTEMPT in 1 2 3 4 5; do
|
||||
git fetch --tags --force
|
||||
# Use max(N) not count — safe even if tags are deleted
|
||||
MAX_N=$(git tag -l "*-${YEAR_MONTH}.*" | sed 's/.*\.//' | sort -n | tail -1)
|
||||
N=$(( ${MAX_N:-0} + 1 ))
|
||||
VERSION="${YEAR_MONTH}.${N}"
|
||||
TAG="${CHANNEL}-${VERSION}"
|
||||
|
||||
git tag -a "$TAG" -m "Release $TAG"
|
||||
if git push origin "$TAG" 2>/dev/null; then
|
||||
echo "Claimed tag $TAG (attempt $ATTEMPT)"
|
||||
TAG_CLAIMED=true
|
||||
break
|
||||
else
|
||||
echo "Tag $TAG already exists, retrying... (attempt $ATTEMPT)"
|
||||
git tag -d "$TAG"
|
||||
sleep 2
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$TAG_CLAIMED" != "true" ]; then
|
||||
echo "::error::Failed to claim a unique version tag after 5 attempts"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "channel=${CHANNEL}" >> "$GITHUB_OUTPUT"
|
||||
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
||||
echo "versioned_tag=${TAG}" >> "$GITHUB_OUTPUT"
|
||||
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
echo "Channel: ${CHANNEL}"
|
||||
echo "Version: ${VERSION}"
|
||||
echo "Versioned tag: ${TAG}"
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
push: true
|
||||
build-args: |
|
||||
AGNES_VERSION=${{ steps.meta.outputs.version }}
|
||||
RELEASE_CHANNEL=${{ steps.meta.outputs.channel }}
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.channel }}
|
||||
ghcr.io/${{ github.repository }}:${{ steps.meta.outputs.versioned_tag }}
|
||||
ghcr.io/${{ github.repository }}:sha-${{ steps.meta.outputs.short_sha }}
|
||||
|
||||
smoke-test:
|
||||
needs: build-and-push
|
||||
if: github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Start Agnes from built image
|
||||
run: |
|
||||
# Create empty .env (docker-compose.yml requires env_file: .env, gitignored)
|
||||
touch .env
|
||||
# Use prod compose (GHCR images) + CI overlay (test secrets)
|
||||
export AGNES_TAG="${{ needs.build-and-push.outputs.image_tag }}"
|
||||
docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml up -d app
|
||||
# Wait for healthy (max 60s)
|
||||
timeout 60 bash -c 'until curl -sf http://localhost:8000/api/health | python3 -c "import sys,json; d=json.load(sys.stdin); sys.exit(0 if d[\"status\"]!=\"unhealthy\" else 1)"; do sleep 3; done'
|
||||
|
||||
- name: Run smoke tests
|
||||
run: bash scripts/smoke-test.sh http://localhost:8000
|
||||
|
||||
- name: Collect logs on failure
|
||||
if: failure()
|
||||
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml logs > smoke-test-logs.txt
|
||||
|
||||
- name: Upload logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: smoke-test-logs
|
||||
path: smoke-test-logs.txt
|
||||
|
||||
- name: Teardown
|
||||
if: always()
|
||||
run: docker compose -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.ci.yml down -v
|
||||
33
CHANGELOG.md
Normal file
33
CHANGELOG.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to Agnes AI Data Analyst are documented in this file.
|
||||
|
||||
Format: [CalVer](https://calver.org/) `YYYY.MM.N` with channels `stable` and `dev`.
|
||||
|
||||
---
|
||||
|
||||
## stable-2026.04.1 (unreleased)
|
||||
|
||||
Multi-instance deployment and self-service setup.
|
||||
|
||||
### Added
|
||||
- CalVer versioning with `stable` and `dev` release channels
|
||||
- `/api/health` now returns `version`, `channel`, and `schema_version`
|
||||
- Auto-generated JWT and session secrets with file persistence (`/data/state/.jwt_secret`)
|
||||
- Pre-migration snapshot of `system.duckdb` before schema upgrades
|
||||
- `POST /api/admin/configure` for headless data source configuration
|
||||
- `POST /api/admin/discover-and-register` combined table discovery and registration
|
||||
- `/setup` web wizard for first-time instance setup
|
||||
- `scripts/smoke-test.sh` for post-deploy verification
|
||||
- Smoke test job in CI (Docker-in-CI after every release)
|
||||
- OpenAPI snapshot test for breaking change detection
|
||||
- Custom connector mount support (`connectors/custom/`)
|
||||
- Startup banner logging version, channel, and schema version
|
||||
- Schema migration safety tests (idempotency, data preservation, snapshot)
|
||||
- `CHANGELOG.md` and release notes template
|
||||
|
||||
### Breaking Changes
|
||||
None.
|
||||
|
||||
### Migration Guide
|
||||
No action required. Existing instances upgrade seamlessly.
|
||||
|
|
@ -154,7 +154,7 @@ Auth providers in `app/auth/` (FastAPI-based):
|
|||
## Key Implementation Details
|
||||
|
||||
### DuckDB Schema (src/db.py)
|
||||
- Schema v2 with auto-migration from v1
|
||||
- Schema v3 with auto-migration from v1→v2→v3
|
||||
- `table_registry`: id, name, source_type, bucket, source_table, query_mode, sync_schedule, etc.
|
||||
- `sync_state`, `sync_history`: track extraction progress
|
||||
- `users`, `dataset_permissions`, `audit_log`: auth + RBAC
|
||||
|
|
|
|||
|
|
@ -6,6 +6,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf
|
|||
# Install uv for fast dependency management
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
ARG AGNES_VERSION=dev
|
||||
ARG RELEASE_CHANNEL=dev
|
||||
ENV AGNES_VERSION=${AGNES_VERSION}
|
||||
ENV RELEASE_CHANNEL=${RELEASE_CHANNEL}
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code
|
||||
|
|
|
|||
6
Makefile
6
Makefile
|
|
@ -1,6 +1,6 @@
|
|||
# Agnes AI Data Analyst — Development Makefile
|
||||
|
||||
.PHONY: help test lint dev docker
|
||||
.PHONY: help test lint dev docker update-openapi-snapshot
|
||||
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
|
|
@ -20,3 +20,7 @@ docker:
|
|||
|
||||
lint:
|
||||
@ruff check . 2>/dev/null || echo "ruff not installed: pip install ruff"
|
||||
|
||||
update-openapi-snapshot:
|
||||
TESTING=1 python scripts/generate_openapi.py > tests/snapshots/openapi.json
|
||||
@echo "Snapshot updated. Review diff and commit."
|
||||
|
|
|
|||
227
app/api/admin.py
227
app/api/admin.py
|
|
@ -1,7 +1,9 @@
|
|||
"""Admin endpoints — table discovery, registry management."""
|
||||
"""Admin endpoints — table discovery, registry management, instance configuration."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -42,6 +44,16 @@ class UpdateTableRequest(BaseModel):
|
|||
profile_after_sync: Optional[bool] = None
|
||||
|
||||
|
||||
class ConfigureRequest(BaseModel):
|
||||
data_source: str # "keboola" | "bigquery" | "local"
|
||||
keboola_token: Optional[str] = None
|
||||
keboola_url: Optional[str] = None
|
||||
bigquery_project: Optional[str] = None
|
||||
bigquery_location: Optional[str] = None
|
||||
instance_name: Optional[str] = None
|
||||
allowed_domain: Optional[str] = None
|
||||
|
||||
|
||||
@router.get("/discover-tables")
|
||||
async def discover_tables(
|
||||
user: dict = Depends(require_role(Role.ADMIN)),
|
||||
|
|
@ -53,10 +65,12 @@ async def discover_tables(
|
|||
|
||||
if source_type == "keboola":
|
||||
from connectors.keboola.client import KeboolaClient
|
||||
import os
|
||||
from app.instance_config import get_value
|
||||
url = get_value("keboola", "url", default="")
|
||||
token = os.environ.get(get_value("keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN"), "")
|
||||
url = get_value("data_source", "keboola", "stack_url", default="")
|
||||
token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
|
||||
token = os.environ.get(token_env, "") if token_env else ""
|
||||
if not token:
|
||||
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
|
||||
client = KeboolaClient(token=token, url=url)
|
||||
tables = client.discover_all_tables()
|
||||
return {"tables": tables, "count": len(tables), "source": "keboola"}
|
||||
|
|
@ -144,3 +158,208 @@ async def unregister_table(
|
|||
if not repo.get(table_id):
|
||||
raise HTTPException(status_code=404, detail="Table not found")
|
||||
repo.unregister(table_id)
|
||||
|
||||
|
||||
@router.post("/configure")
|
||||
async def configure_instance(
|
||||
request: ConfigureRequest,
|
||||
user: dict = Depends(require_role(Role.ADMIN)),
|
||||
):
|
||||
"""Configure data source and instance settings via API.
|
||||
|
||||
Writes config to instance.yaml and persists secrets to .env_overlay.
|
||||
AI agents and the /setup wizard use this instead of manual file editing.
|
||||
"""
|
||||
import yaml
|
||||
|
||||
if request.data_source not in ("keboola", "bigquery", "local"):
|
||||
raise HTTPException(status_code=400, detail="data_source must be 'keboola', 'bigquery', or 'local'")
|
||||
|
||||
# Validate credentials if provided
|
||||
if request.data_source == "keboola":
|
||||
if not request.keboola_token or not request.keboola_url:
|
||||
raise HTTPException(status_code=400, detail="keboola_token and keboola_url are required for Keboola data source")
|
||||
try:
|
||||
from connectors.keboola.client import KeboolaClient
|
||||
client = KeboolaClient(token=request.keboola_token, url=request.keboola_url)
|
||||
client.test_connection()
|
||||
except Exception as e:
|
||||
logger.error("Keboola connection validation failed: %s", e)
|
||||
raise HTTPException(status_code=400, detail="Keboola connection failed. Check your token and URL.")
|
||||
|
||||
elif request.data_source == "bigquery":
|
||||
if not request.bigquery_project:
|
||||
raise HTTPException(status_code=400, detail="bigquery_project is required for BigQuery data source")
|
||||
|
||||
# Write instance.yaml to DATA_DIR/state/ (writable Docker volume),
|
||||
# NOT to CONFIG_DIR which is mounted read-only in Docker.
|
||||
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
|
||||
config_path = data_dir / "state" / "instance.yaml"
|
||||
|
||||
# Load existing API-generated config, or fall back to read-only CONFIG_DIR config
|
||||
existing = {}
|
||||
if config_path.exists():
|
||||
try:
|
||||
existing = yaml.safe_load(config_path.read_text()) or {}
|
||||
except Exception:
|
||||
existing = {}
|
||||
else:
|
||||
# Try loading from read-only config as base
|
||||
ro_path = Path(os.environ.get("CONFIG_DIR", "./config")) / "instance.yaml"
|
||||
if ro_path.exists():
|
||||
try:
|
||||
existing = yaml.safe_load(ro_path.read_text()) or {}
|
||||
except Exception:
|
||||
existing = {}
|
||||
|
||||
# Merge instance settings
|
||||
if request.instance_name:
|
||||
existing.setdefault("instance", {})["name"] = request.instance_name
|
||||
|
||||
if request.allowed_domain:
|
||||
existing.setdefault("auth", {})["allowed_domain"] = request.allowed_domain
|
||||
|
||||
# Merge data source config (secrets as env var references)
|
||||
existing["data_source"] = {"type": request.data_source}
|
||||
if request.data_source == "keboola":
|
||||
existing["data_source"]["keboola"] = {
|
||||
"stack_url": request.keboola_url,
|
||||
"token_env": "KEBOOLA_STORAGE_TOKEN",
|
||||
}
|
||||
elif request.data_source == "bigquery":
|
||||
existing["data_source"]["bigquery"] = {
|
||||
"project": request.bigquery_project,
|
||||
"location": request.bigquery_location or "us",
|
||||
}
|
||||
|
||||
# Write to writable data volume
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(yaml.dump(existing, default_flow_style=False, sort_keys=False))
|
||||
logger.info("Wrote instance config to %s", config_path)
|
||||
|
||||
# Persist secrets to .env_overlay (in data volume, never in git)
|
||||
secrets_to_persist = {}
|
||||
if request.keboola_token:
|
||||
secrets_to_persist["KEBOOLA_STORAGE_TOKEN"] = request.keboola_token
|
||||
if request.keboola_url:
|
||||
secrets_to_persist["KEBOOLA_STACK_URL"] = request.keboola_url
|
||||
|
||||
if secrets_to_persist:
|
||||
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
|
||||
overlay_path = data_dir / "state" / ".env_overlay"
|
||||
overlay_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Merge with existing overlay
|
||||
existing_overlay = {}
|
||||
if overlay_path.exists():
|
||||
for line in overlay_path.read_text().splitlines():
|
||||
if "=" in line and not line.startswith("#"):
|
||||
k, v = line.split("=", 1)
|
||||
existing_overlay[k.strip()] = v.strip()
|
||||
existing_overlay.update(secrets_to_persist)
|
||||
|
||||
overlay_path.write_text(
|
||||
"\n".join(f"{k}={v}" for k, v in existing_overlay.items()) + "\n"
|
||||
)
|
||||
try:
|
||||
overlay_path.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
logger.info("Persisted %d secrets to .env_overlay", len(secrets_to_persist))
|
||||
|
||||
# Inject into current process environment
|
||||
for k, v in secrets_to_persist.items():
|
||||
os.environ[k] = v
|
||||
|
||||
# Invalidate cached instance config so next read picks up changes
|
||||
import app.instance_config as ic
|
||||
ic._instance_config = None
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"data_source": request.data_source,
|
||||
"connection": "verified" if request.data_source != "local" else "local",
|
||||
}
|
||||
|
||||
|
||||
def _discover_and_register_tables(conn: duckdb.DuckDBPyConnection, user_email: str) -> dict:
|
||||
"""Discover tables from configured source and register them. Shared logic for API and sync."""
|
||||
from app.instance_config import get_data_source_type, get_value
|
||||
|
||||
source_type = get_data_source_type()
|
||||
if source_type != "keboola":
|
||||
return {"registered": 0, "skipped": 0, "errors": 0, "tables": [], "source": source_type}
|
||||
|
||||
from connectors.keboola.client import KeboolaClient
|
||||
# Read from data_source.keboola (matches what /api/admin/configure writes)
|
||||
url = get_value("data_source", "keboola", "stack_url", default="")
|
||||
token_env = get_value("data_source", "keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN")
|
||||
token = os.environ.get(token_env, "") if token_env else ""
|
||||
if not token:
|
||||
token = os.environ.get("KEBOOLA_STORAGE_TOKEN", "")
|
||||
|
||||
client = KeboolaClient(token=token, url=url)
|
||||
discovered = client.discover_all_tables()
|
||||
|
||||
repo = TableRegistryRepository(conn)
|
||||
registered = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
table_names = []
|
||||
|
||||
for table in discovered:
|
||||
table_id = table.get("id", "").strip().lower().replace(".", "_").replace(" ", "_")
|
||||
if not table_id:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if repo.get(table_id):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse bucket from table ID (format: in.c-bucket.table_name)
|
||||
parts = table.get("id", "").split(".")
|
||||
bucket = parts[1] if len(parts) > 1 else ""
|
||||
source_table = parts[2] if len(parts) > 2 else table.get("name", "")
|
||||
|
||||
repo.register(
|
||||
id=table_id,
|
||||
name=table.get("name", table_id),
|
||||
source_type="keboola",
|
||||
bucket=bucket,
|
||||
source_table=source_table,
|
||||
query_mode="local",
|
||||
registered_by=user_email,
|
||||
description=f"Auto-discovered from Keboola: {table.get('id', '')}",
|
||||
)
|
||||
registered += 1
|
||||
table_names.append(table_id)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to register %s: %s", table_id, e)
|
||||
errors += 1
|
||||
|
||||
return {
|
||||
"registered": registered,
|
||||
"skipped": skipped,
|
||||
"errors": errors,
|
||||
"tables": table_names,
|
||||
"source": "keboola",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/discover-and-register")
|
||||
async def discover_and_register(
|
||||
user: dict = Depends(require_role(Role.ADMIN)),
|
||||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||||
):
|
||||
"""Discover tables from configured source and auto-register them.
|
||||
|
||||
Combines discover-tables + register-table into one call.
|
||||
Skips already-registered tables. Used by /setup wizard and AI agents.
|
||||
"""
|
||||
try:
|
||||
result = _discover_and_register_tables(conn, user.get("email", "admin"))
|
||||
return result
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Discovery and registration failed: {e}")
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
"""Health check endpoint — structured diagnostics for AI agents."""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
import duckdb
|
||||
|
||||
from app.auth.dependencies import _get_db
|
||||
from src.db import SCHEMA_VERSION
|
||||
from src.repositories.sync_state import SyncStateRepository
|
||||
|
||||
router = APIRouter(tags=["health"])
|
||||
|
|
@ -69,6 +71,9 @@ async def health_check(conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
|
|||
|
||||
return {
|
||||
"status": overall,
|
||||
"version": os.environ.get("AGNES_VERSION", "dev"),
|
||||
"channel": os.environ.get("RELEASE_CHANNEL", "dev"),
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"services": checks,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,8 +64,29 @@ def _run_sync(tables: Optional[List[str]] = None):
|
|||
sys_conn.close()
|
||||
|
||||
if not table_configs:
|
||||
logger.warning("No tables to sync for source_type=%s", source_type)
|
||||
return
|
||||
# Auto-discover tables on first sync when registry is empty
|
||||
if source_type == "keboola" and os.environ.get("KEBOOLA_STORAGE_TOKEN"):
|
||||
logger.info("No tables registered — running auto-discovery from Keboola")
|
||||
try:
|
||||
from app.api.admin import _discover_and_register_tables
|
||||
auto_conn = get_system_db()
|
||||
try:
|
||||
result = _discover_and_register_tables(auto_conn, "auto-discovery")
|
||||
logger.info("Auto-discovered %d tables, skipped %d", result["registered"], result["skipped"])
|
||||
finally:
|
||||
auto_conn.close()
|
||||
# Re-read table configs after auto-registration
|
||||
sys_conn2 = get_system_db()
|
||||
try:
|
||||
table_configs = TableRegistryRepository(sys_conn2).list_local(source_type)
|
||||
finally:
|
||||
sys_conn2.close()
|
||||
except Exception as e:
|
||||
logger.warning("Auto-discovery failed: %s", e)
|
||||
|
||||
if not table_configs:
|
||||
logger.warning("No tables to sync for source_type=%s", source_type)
|
||||
return
|
||||
|
||||
# Serialize configs — strip non-serializable fields
|
||||
serializable = []
|
||||
|
|
@ -113,6 +134,29 @@ print(json.dumps(result))
|
|||
else:
|
||||
print(f"[SYNC] Extractor OK", file=_sys.stderr, flush=True)
|
||||
|
||||
# Run custom connectors (Tier A: local mount)
|
||||
connectors_dir = Path(os.environ.get("CONNECTORS_DIR", str(Path(__file__).parent.parent.parent / "connectors" / "custom")))
|
||||
if connectors_dir.exists():
|
||||
for connector_dir in sorted(connectors_dir.iterdir()):
|
||||
if not connector_dir.is_dir():
|
||||
continue
|
||||
extractor = connector_dir / "extractor.py"
|
||||
if not extractor.exists():
|
||||
continue
|
||||
logger.info("Running custom connector: %s", connector_dir.name)
|
||||
try:
|
||||
custom_result = subprocess.run(
|
||||
[sys.executable, str(extractor)],
|
||||
env=env, capture_output=True, text=True, timeout=600,
|
||||
cwd=str(Path(__file__).parent.parent.parent),
|
||||
)
|
||||
if custom_result.returncode != 0:
|
||||
logger.error("Custom connector %s failed: %s", connector_dir.name, custom_result.stderr[-500:])
|
||||
else:
|
||||
logger.info("Custom connector %s completed", connector_dir.name)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("Custom connector %s timed out", connector_dir.name)
|
||||
|
||||
# Rebuild master views (reads extract.duckdb files, no write conflict)
|
||||
from src.orchestrator import SyncOrchestrator
|
||||
orch = SyncOrchestrator()
|
||||
|
|
|
|||
|
|
@ -7,22 +7,22 @@ from typing import Optional
|
|||
|
||||
import jwt
|
||||
|
||||
SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "")
|
||||
|
||||
if not SECRET_KEY:
|
||||
def _get_secret_key() -> str:
|
||||
"""Load JWT secret - from env, file, or auto-generated."""
|
||||
if os.environ.get("TESTING", "").lower() in ("1", "true"):
|
||||
SECRET_KEY = "test-jwt-secret-key-minimum-32-chars!!"
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"JWT_SECRET_KEY environment variable is required. "
|
||||
"Generate one: python -c \"import secrets; print(secrets.token_hex(32))\""
|
||||
return os.environ.get("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!")
|
||||
from app.secrets import get_jwt_secret
|
||||
key = get_jwt_secret()
|
||||
if len(key) < 32:
|
||||
import warnings as _warnings
|
||||
_warnings.warn(
|
||||
f"JWT_SECRET_KEY is {len(key)} chars — minimum 32 recommended",
|
||||
UserWarning, stacklevel=2,
|
||||
)
|
||||
elif len(SECRET_KEY) < 32 and os.environ.get("TESTING", "").lower() not in ("1", "true"):
|
||||
import warnings as _warnings
|
||||
_warnings.warn(
|
||||
f"JWT_SECRET_KEY is {len(SECRET_KEY)} chars — minimum 32 recommended",
|
||||
UserWarning, stacklevel=2,
|
||||
)
|
||||
return key
|
||||
|
||||
|
||||
SECRET_KEY = _get_secret_key()
|
||||
|
||||
ALGORITHM = "HS256"
|
||||
ACCESS_TOKEN_EXPIRE_HOURS = 24 # 24 hours
|
||||
|
|
|
|||
|
|
@ -11,15 +11,34 @@ _instance_config: Optional[dict] = None
|
|||
|
||||
|
||||
def load_instance_config() -> dict:
|
||||
"""Load instance.yaml using the existing config loader."""
|
||||
"""Load instance.yaml — checks API-generated config first, then static config.
|
||||
|
||||
Search order:
|
||||
1. DATA_DIR/state/instance.yaml (written by /api/admin/configure, writable)
|
||||
2. CONFIG_DIR/instance.yaml (static, read-only in Docker)
|
||||
3. Empty dict with defaults (if neither exists)
|
||||
"""
|
||||
global _instance_config
|
||||
if _instance_config is not None:
|
||||
return _instance_config
|
||||
|
||||
# First, try API-generated config in writable data volume
|
||||
import yaml
|
||||
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
|
||||
api_config_path = data_dir / "state" / "instance.yaml"
|
||||
if api_config_path.exists():
|
||||
try:
|
||||
_instance_config = yaml.safe_load(api_config_path.read_text()) or {}
|
||||
logger.info("Loaded instance.yaml from %s", api_config_path)
|
||||
return _instance_config
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load API-generated instance.yaml: {e}")
|
||||
|
||||
# Fall back to static config (may have strict validation)
|
||||
try:
|
||||
from config.loader import load_instance_config as _load, get_instance_value
|
||||
from config.loader import load_instance_config as _load
|
||||
_instance_config = _load()
|
||||
logger.info("Loaded instance.yaml")
|
||||
logger.info("Loaded instance.yaml from config/")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load instance.yaml: {e}. Using defaults.")
|
||||
_instance_config = {}
|
||||
|
|
|
|||
21
app/main.py
21
app/main.py
|
|
@ -48,8 +48,8 @@ def create_app() -> FastAPI:
|
|||
)
|
||||
|
||||
# Session middleware (required for OAuth state)
|
||||
import secrets as _secrets
|
||||
session_secret = os.environ.get("SESSION_SECRET", os.environ.get("JWT_SECRET_KEY", _secrets.token_hex(32)))
|
||||
from app.secrets import get_session_secret
|
||||
session_secret = get_session_secret()
|
||||
app.add_middleware(SessionMiddleware, secret_key=session_secret)
|
||||
|
||||
# CORS for CLI and external clients
|
||||
|
|
@ -62,6 +62,14 @@ def create_app() -> FastAPI:
|
|||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Load .env_overlay (persisted by /api/admin/configure)
|
||||
_overlay = Path(os.environ.get("DATA_DIR", "./data")) / "state" / ".env_overlay"
|
||||
if _overlay.exists():
|
||||
for line in _overlay.read_text().splitlines():
|
||||
if "=" in line and not line.startswith("#"):
|
||||
k, v = line.split("=", 1)
|
||||
os.environ.setdefault(k.strip(), v.strip())
|
||||
|
||||
# Load instance config on startup
|
||||
try:
|
||||
from app.instance_config import load_instance_config
|
||||
|
|
@ -70,6 +78,15 @@ def create_app() -> FastAPI:
|
|||
except Exception as e:
|
||||
logger.warning(f"Could not load instance config: {e}")
|
||||
|
||||
# Startup banner
|
||||
from src.db import SCHEMA_VERSION
|
||||
logger.info(
|
||||
"Agnes %s | channel: %s | schema v%s",
|
||||
os.environ.get("AGNES_VERSION", "dev"),
|
||||
os.environ.get("RELEASE_CHANNEL", "dev"),
|
||||
SCHEMA_VERSION,
|
||||
)
|
||||
|
||||
# Seed admin user for testing/CI (when SEED_ADMIN_EMAIL is set)
|
||||
seed_email = os.environ.get("SEED_ADMIN_EMAIL")
|
||||
if seed_email:
|
||||
|
|
|
|||
43
app/secrets.py
Normal file
43
app/secrets.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""Auto-generate and persist secrets that survive container restarts."""
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load_or_generate(env_var: str, file_name: str) -> str:
|
||||
"""Load secret from env var, or from file, or generate and persist."""
|
||||
val = os.environ.get(env_var, "")
|
||||
if val:
|
||||
return val
|
||||
data_dir = Path(os.environ.get("DATA_DIR", "./data"))
|
||||
secret_path = data_dir / "state" / file_name
|
||||
if secret_path.exists():
|
||||
val = secret_path.read_text().strip()
|
||||
if val:
|
||||
return val
|
||||
logger.warning("Secret file %s is empty, regenerating", secret_path)
|
||||
secret_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
val = secrets.token_hex(32)
|
||||
secret_path.write_text(val)
|
||||
try:
|
||||
secret_path.chmod(0o600)
|
||||
except OSError:
|
||||
pass # chmod not supported on all platforms (e.g., Windows)
|
||||
logger.info(
|
||||
"Auto-generated %s -> %s (set %s in .env to use a fixed value)",
|
||||
file_name, secret_path, env_var,
|
||||
)
|
||||
return val
|
||||
|
||||
|
||||
def get_jwt_secret() -> str:
|
||||
"""Get JWT secret key from env, file, or auto-generate."""
|
||||
return _load_or_generate("JWT_SECRET_KEY", ".jwt_secret")
|
||||
|
||||
|
||||
def get_session_secret() -> str:
|
||||
"""Get session secret from env, file, or auto-generate."""
|
||||
return _load_or_generate("SESSION_SECRET", ".session_secret")
|
||||
|
|
@ -120,6 +120,7 @@ _URL_MAP = {
|
|||
"email_auth.login_email_form": "/login/email",
|
||||
"email_auth.send_magic_link": "/auth/email/send-link",
|
||||
"register": "/auth/password/setup",
|
||||
"setup": "/setup",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -177,6 +178,18 @@ async def index(request: Request, user: Optional[dict] = Depends(get_optional_us
|
|||
return RedirectResponse(url="/login", status_code=302)
|
||||
|
||||
|
||||
@router.get("/setup", response_class=HTMLResponse)
|
||||
async def setup_wizard(request: Request, conn: duckdb.DuckDBPyConnection = Depends(_get_db)):
|
||||
"""First-time setup wizard. Redirects to dashboard if users already exist."""
|
||||
try:
|
||||
user_count = conn.execute("SELECT COUNT(*) FROM users").fetchone()[0]
|
||||
if user_count > 0:
|
||||
return RedirectResponse(url="/login", status_code=302)
|
||||
except Exception:
|
||||
pass # No users table yet — show setup
|
||||
return templates.TemplateResponse(request, "setup.html", _build_context(request))
|
||||
|
||||
|
||||
@router.get("/login", response_class=HTMLResponse)
|
||||
async def login_page(request: Request):
|
||||
providers = []
|
||||
|
|
|
|||
267
app/web/templates/setup.html
Normal file
267
app/web/templates/setup.html
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
{% extends "base_login.html" %}
|
||||
|
||||
{% block title %}Setup - Agnes AI Data Analyst{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="login-page">
|
||||
<div class="login-card-wrapper" style="max-width: 520px; margin: 40px auto; padding: 0 20px;">
|
||||
<div class="login-card" style="max-width: 520px;">
|
||||
<h2 id="wizard-title">Setup Agnes</h2>
|
||||
<p class="login-description" id="wizard-description">
|
||||
Create your admin account to get started.
|
||||
</p>
|
||||
|
||||
<!-- Progress -->
|
||||
<div style="display: flex; gap: 8px; margin-bottom: 24px;">
|
||||
<div id="step-dot-1" style="flex: 1; height: 4px; border-radius: 2px; background: var(--primary, #2563eb);"></div>
|
||||
<div id="step-dot-2" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
|
||||
<div id="step-dot-3" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
|
||||
<div id="step-dot-4" style="flex: 1; height: 4px; border-radius: 2px; background: #e5e7eb;"></div>
|
||||
</div>
|
||||
|
||||
<!-- Status message -->
|
||||
<div id="status-msg" style="display: none; padding: 10px 14px; border-radius: 6px; margin-bottom: 16px; font-size: 14px;"></div>
|
||||
|
||||
<!-- Step 1: Create Admin -->
|
||||
<div id="step-1">
|
||||
<form id="admin-form" onsubmit="return createAdmin(event)">
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Email</label>
|
||||
<input type="email" id="admin-email" required placeholder="admin@company.com"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
|
||||
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Password</label>
|
||||
<input type="password" id="admin-password" required minlength="8" placeholder="Min. 8 characters"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
|
||||
|
||||
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-admin">
|
||||
Create Admin Account
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Step 2: Data Source -->
|
||||
<div id="step-2" style="display: none;">
|
||||
<form id="source-form" onsubmit="return configureSource(event)">
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Data Source</label>
|
||||
<select id="data-source" onchange="toggleSourceFields()"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
|
||||
<option value="keboola">Keboola</option>
|
||||
<option value="bigquery">BigQuery</option>
|
||||
<option value="local">Local / CSV</option>
|
||||
</select>
|
||||
|
||||
<div id="keboola-fields">
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Keboola URL</label>
|
||||
<input type="url" id="keboola-url" placeholder="https://connection.keboola.com"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
|
||||
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Storage API Token</label>
|
||||
<input type="password" id="keboola-token" placeholder="Your Keboola storage token"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
|
||||
</div>
|
||||
|
||||
<div id="bigquery-fields" style="display: none;">
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">GCP Project</label>
|
||||
<input type="text" id="bq-project" placeholder="my-gcp-project"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 12px; font-size: 14px; box-sizing: border-box;">
|
||||
|
||||
<label style="display: block; margin-bottom: 4px; font-size: 14px; font-weight: 500;">Location</label>
|
||||
<input type="text" id="bq-location" value="us" placeholder="us"
|
||||
style="width: 100%; padding: 10px 12px; border: 1px solid #d1d5db; border-radius: 6px; margin-bottom: 16px; font-size: 14px; box-sizing: border-box;">
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary" style="width: 100%;" id="btn-source">
|
||||
Configure Data Source
|
||||
</button>
|
||||
<button type="button" onclick="skipToStep(4)" class="btn btn-secondary" style="width: 100%; margin-top: 8px;" id="btn-skip-source">
|
||||
Skip (configure later)
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Step 3: Discover Tables -->
|
||||
<div id="step-3" style="display: none;">
|
||||
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
|
||||
Discover and register tables from your data source.
|
||||
</p>
|
||||
<button onclick="discoverTables()" class="btn btn-primary" style="width: 100%;" id="btn-discover">
|
||||
Discover Tables
|
||||
</button>
|
||||
<div id="discover-result" style="display: none; margin-top: 12px; padding: 12px; background: #f0fdf4; border-radius: 6px; font-size: 14px;"></div>
|
||||
<button onclick="goToStep(4)" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none;" id="btn-next-sync">
|
||||
Continue
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Step 4: First Sync & Done -->
|
||||
<div id="step-4" style="display: none;">
|
||||
<p style="font-size: 14px; color: #6b7280; margin-bottom: 16px;">
|
||||
Start the first data sync and go to your dashboard.
|
||||
</p>
|
||||
<button onclick="triggerSync()" class="btn btn-primary" style="width: 100%;" id="btn-sync">
|
||||
Start First Sync
|
||||
</button>
|
||||
<a href="/dashboard" class="btn btn-primary" style="width: 100%; margin-top: 12px; display: none; text-align: center; text-decoration: none;" id="btn-dashboard">
|
||||
Go to Dashboard
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let token = '';
|
||||
const steps = {
|
||||
1: { title: 'Setup Agnes', desc: 'Create your admin account to get started.' },
|
||||
2: { title: 'Data Source', desc: 'Connect to your data source.' },
|
||||
3: { title: 'Discover Tables', desc: 'Find and register tables from your data source.' },
|
||||
4: { title: 'Almost Done', desc: 'Start syncing data and open your dashboard.' },
|
||||
};
|
||||
|
||||
function showStatus(msg, type) {
|
||||
const el = document.getElementById('status-msg');
|
||||
el.textContent = msg;
|
||||
el.style.display = 'block';
|
||||
el.style.background = type === 'error' ? '#fef2f2' : '#f0fdf4';
|
||||
el.style.color = type === 'error' ? '#dc2626' : '#16a34a';
|
||||
}
|
||||
|
||||
function hideStatus() {
|
||||
document.getElementById('status-msg').style.display = 'none';
|
||||
}
|
||||
|
||||
function goToStep(n) {
|
||||
hideStatus();
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
document.getElementById('step-' + i).style.display = i === n ? 'block' : 'none';
|
||||
document.getElementById('step-dot-' + i).style.background = i <= n ? 'var(--primary, #2563eb)' : '#e5e7eb';
|
||||
}
|
||||
document.getElementById('wizard-title').textContent = steps[n].title;
|
||||
document.getElementById('wizard-description').textContent = steps[n].desc;
|
||||
}
|
||||
|
||||
function skipToStep(n) {
|
||||
goToStep(n);
|
||||
}
|
||||
|
||||
function toggleSourceFields() {
|
||||
const src = document.getElementById('data-source').value;
|
||||
document.getElementById('keboola-fields').style.display = src === 'keboola' ? 'block' : 'none';
|
||||
document.getElementById('bigquery-fields').style.display = src === 'bigquery' ? 'block' : 'none';
|
||||
}
|
||||
|
||||
async function apiCall(url, body) {
|
||||
const headers = { 'Content-Type': 'application/json' };
|
||||
if (token) headers['Authorization'] = 'Bearer ' + token;
|
||||
const resp = await fetch(url, { method: 'POST', headers, body: JSON.stringify(body) });
|
||||
if (resp.status === 401) {
|
||||
token = '';
|
||||
sessionStorage.removeItem('setup_token');
|
||||
showStatus('Session expired. Please refresh the page and start over.', 'error');
|
||||
throw new Error('Session expired');
|
||||
}
|
||||
const data = await resp.json();
|
||||
if (!resp.ok) throw new Error(data.detail || 'Request failed');
|
||||
return data;
|
||||
}
|
||||
|
||||
async function createAdmin(e) {
|
||||
e.preventDefault();
|
||||
const btn = document.getElementById('btn-admin');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Creating...';
|
||||
try {
|
||||
const data = await apiCall('/auth/bootstrap', {
|
||||
email: document.getElementById('admin-email').value,
|
||||
password: document.getElementById('admin-password').value,
|
||||
});
|
||||
token = data.access_token;
|
||||
sessionStorage.setItem('setup_token', token);
|
||||
goToStep(2);
|
||||
} catch (err) {
|
||||
showStatus(err.message, 'error');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Create Admin Account';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function configureSource(e) {
|
||||
e.preventDefault();
|
||||
const btn = document.getElementById('btn-source');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Verifying...';
|
||||
try {
|
||||
const src = document.getElementById('data-source').value;
|
||||
const body = { data_source: src };
|
||||
if (src === 'keboola') {
|
||||
body.keboola_url = document.getElementById('keboola-url').value;
|
||||
body.keboola_token = document.getElementById('keboola-token').value;
|
||||
} else if (src === 'bigquery') {
|
||||
body.bigquery_project = document.getElementById('bq-project').value;
|
||||
body.bigquery_location = document.getElementById('bq-location').value;
|
||||
}
|
||||
await apiCall('/api/admin/configure', body);
|
||||
showStatus('Connection verified!', 'success');
|
||||
if (src === 'local') {
|
||||
goToStep(4);
|
||||
} else {
|
||||
goToStep(3);
|
||||
}
|
||||
} catch (err) {
|
||||
showStatus(err.message, 'error');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Configure Data Source';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function discoverTables() {
|
||||
const btn = document.getElementById('btn-discover');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Discovering...';
|
||||
try {
|
||||
const headers = { 'Content-Type': 'application/json' };
|
||||
if (token) headers['Authorization'] = 'Bearer ' + token;
|
||||
const resp = await fetch('/api/admin/discover-and-register', { method: 'POST', headers });
|
||||
const data = await resp.json();
|
||||
if (!resp.ok) throw new Error(data.detail || 'Discovery failed');
|
||||
|
||||
const el = document.getElementById('discover-result');
|
||||
el.style.display = 'block';
|
||||
el.textContent = `Registered ${data.registered} tables, skipped ${data.skipped}.`;
|
||||
document.getElementById('btn-next-sync').style.display = 'block';
|
||||
btn.style.display = 'none';
|
||||
} catch (err) {
|
||||
showStatus(err.message, 'error');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Discover Tables';
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerSync() {
|
||||
const btn = document.getElementById('btn-sync');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Starting sync...';
|
||||
try {
|
||||
const headers = {};
|
||||
if (token) headers['Authorization'] = 'Bearer ' + token;
|
||||
await fetch('/api/sync/trigger', { method: 'POST', headers });
|
||||
btn.style.display = 'none';
|
||||
document.getElementById('btn-dashboard').style.display = 'block';
|
||||
showStatus('Sync started! You can now go to your dashboard.', 'success');
|
||||
} catch (err) {
|
||||
showStatus(err.message, 'error');
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Start First Sync';
|
||||
}
|
||||
}
|
||||
|
||||
// Restore token from sessionStorage (in case of page reload)
|
||||
const savedToken = sessionStorage.getItem('setup_token');
|
||||
if (savedToken) token = savedToken;
|
||||
</script>
|
||||
{% endblock %}
|
||||
11
docker-compose.ci.yml
Normal file
11
docker-compose.ci.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# CI smoke test overlay — minimal config for testing in GitHub Actions.
|
||||
# Usage: docker compose -f docker-compose.yml -f docker-compose.ci.yml up -d
|
||||
services:
|
||||
app:
|
||||
environment:
|
||||
- JWT_SECRET_KEY=smoke-test-ci-key-minimum-32-chars-xx
|
||||
- SESSION_SECRET=smoke-test-session-key-32-chars-min-x
|
||||
- DATA_DIR=/data
|
||||
- TESTING=0
|
||||
ports:
|
||||
- "8000:8000"
|
||||
|
|
@ -1,17 +1,18 @@
|
|||
# Production override — uses pre-built GHCR image instead of local build.
|
||||
# Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
# Override tag: AGNES_TAG=stable-2026.04.3 docker compose -f ... up -d
|
||||
services:
|
||||
app:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
scheduler:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
extract:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
telegram-bot:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
ws-gateway:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
corporate-memory:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
session-collector:
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:latest
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ services:
|
|||
volumes:
|
||||
- data:/data
|
||||
- ./config:/app/config:ro
|
||||
# - ./custom-connectors:/app/connectors/custom:ro # Tier A: AI-generated connectors
|
||||
env_file: .env
|
||||
environment:
|
||||
- DATA_DIR=/data
|
||||
|
|
|
|||
37
docs/RELEASE_TEMPLATE.md
Normal file
37
docs/RELEASE_TEMPLATE.md
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Release Notes Template
|
||||
|
||||
Use this template when adding a new entry to `CHANGELOG.md`.
|
||||
|
||||
---
|
||||
|
||||
## stable-YYYY.MM.N
|
||||
|
||||
**Image:** `ghcr.io/keboola/agnes-the-ai-analyst:stable-YYYY.MM.N`
|
||||
**Digest:** `sha256:...` (from `docker inspect --format='{{index .RepoDigests 0}}'`)
|
||||
**Date:** YYYY-MM-DD
|
||||
|
||||
### Added
|
||||
- Feature description
|
||||
|
||||
### Changed
|
||||
- Change description
|
||||
|
||||
### Fixed
|
||||
- Bug fix description
|
||||
|
||||
### Breaking Changes
|
||||
- Description of breaking change
|
||||
- **Migration guide:** Steps to upgrade from previous version
|
||||
|
||||
### Deprecated
|
||||
- Description of deprecated feature (will be removed in YYYY.MM.N)
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- Every merge to `main` creates a new `stable-YYYY.MM.N` release
|
||||
- Include the image digest for verification with `cosign verify`
|
||||
- Breaking changes require `BREAKING:` prefix in commit message
|
||||
- Migration guides must include exact commands or config changes
|
||||
- If a release deprecates the previous stable, note it explicitly
|
||||
|
|
@ -0,0 +1,527 @@
|
|||
# Multi-Instance Deployment & Versioning — Design Spec
|
||||
|
||||
## Goal
|
||||
|
||||
Make Agnes deployable to 20+ independent customer instances via self-service, with safe versioning that prevents one customer's PR from breaking another's deployment.
|
||||
|
||||
## Context
|
||||
|
||||
Agnes is an open-source AI Data Analyst platform. Customers (or their AI agents) deploy it as a Docker image on their own infrastructure. Each instance connects to different data sources (Keboola, BigQuery, Jira, custom).
|
||||
|
||||
**Key constraints:**
|
||||
- Customers range from semi-technical to non-technical, assisted by AI agents
|
||||
- Cloud-agnostic (GCP, AWS, Azure, on-prem, VPS)
|
||||
- One repo, one Docker image, many instances
|
||||
- Community PRs must not break existing customers
|
||||
- AI agent is the primary "installer" and "developer"
|
||||
|
||||
---
|
||||
|
||||
## 1. Versioning & Release Channels
|
||||
|
||||
### CalVer: `YYYY.MM.N`
|
||||
|
||||
Format: year.month.sequential-number. Example: `2026.04.1`, `2026.04.2`, `2026.05.1`.
|
||||
|
||||
No manual release decisions. Every merge to main is a release.
|
||||
|
||||
### Three channels
|
||||
|
||||
| Channel | Floating tag | Versioned tag | Source | Who uses it |
|
||||
|---------|-------------|---------------|--------|-------------|
|
||||
| **dev** | `:dev` | `:dev-2026.04.N` | Every CI-passing push on any feature branch | Developers, PR testing |
|
||||
| **stable** | `:stable` | `:stable-2026.04.N` | Every merge to main + CI pass | All production customers |
|
||||
| **deprecated** | — | `:deprecated-2026.04.N` | Previous stable after breaking change or failed smoke test | Grace period (30 days) |
|
||||
|
||||
Every image also gets a `:sha-abc1234` tag for exact commit traceability.
|
||||
|
||||
### Tag lifecycle
|
||||
|
||||
```
|
||||
feature branch push → CI ✅ → :dev + :dev-2026.04.N + :sha-abc1234
|
||||
❌ → nothing pushed
|
||||
|
||||
merge to main → CI ✅ → :stable + :stable-2026.04.N + :sha-abc1234
|
||||
❌ → merge blocked (CI required)
|
||||
│
|
||||
▼
|
||||
smoke test on canary VM
|
||||
│
|
||||
✅ → :stable confirmed
|
||||
❌ → alert, rollback canary to previous :stable
|
||||
broken build tagged :deprecated-2026.04.N
|
||||
```
|
||||
|
||||
### Version numbering
|
||||
|
||||
CalVer `YYYY.MM.N` where N is a global auto-incrementing counter per month across both channels.
|
||||
|
||||
Example timeline:
|
||||
```
|
||||
Apr 8 feature/foo push → :dev-2026.04.1
|
||||
Apr 8 feature/bar push → :dev-2026.04.2
|
||||
Apr 8 merge foo to main → :stable-2026.04.3
|
||||
Apr 9 feature/baz push → :dev-2026.04.4
|
||||
Apr 9 merge bar to main → :stable-2026.04.5
|
||||
```
|
||||
|
||||
This avoids confusion — version `2026.04.3` exists only once, in one channel.
|
||||
|
||||
### Customer pins version
|
||||
|
||||
```yaml
|
||||
# docker-compose.prod.yml
|
||||
|
||||
# Auto-update (recommended): always latest stable
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:stable
|
||||
|
||||
# Pinned: specific stable release, manual update
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:stable-2026.04.3
|
||||
|
||||
# Testing: latest dev
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:dev
|
||||
|
||||
# Testing: specific dev build
|
||||
image: ghcr.io/keboola/agnes-the-ai-analyst:dev-2026.04.2
|
||||
```
|
||||
|
||||
### Main = stable
|
||||
|
||||
- `main` branch is always releasable
|
||||
- Every merge to main triggers a new stable release
|
||||
- Feature branches are the dev channel
|
||||
- No promotion pipeline, no manual approval for releases
|
||||
- Smoke test is a post-deploy safety net, not a gate
|
||||
|
||||
---
|
||||
|
||||
## 2. Breaking Change Detection
|
||||
|
||||
### What is a breaking change
|
||||
|
||||
- `_meta` table schema change (add/remove column)
|
||||
- `_remote_attach` table schema change
|
||||
- API endpoint removed or response field removed
|
||||
- DuckDB system schema migration that drops data
|
||||
- CLI command removed or argument renamed
|
||||
- `instance.yaml` required key added
|
||||
|
||||
### Automated detection in CI
|
||||
|
||||
Every PR runs:
|
||||
|
||||
1. **Contract tests**: `_meta` and `_remote_attach` schema validation against frozen spec
|
||||
2. **OpenAPI diff**: Compare PR's `openapi.json` against main's. Flag removed endpoints/fields.
|
||||
3. **DuckDB schema diff**: Compare table definitions in system.duckdb
|
||||
4. **Config diff**: Compare `instance.yaml.example` required keys
|
||||
5. **Full connector matrix**: ALL connectors tested, not just changed ones
|
||||
|
||||
If breaking change detected:
|
||||
- PR gets `BREAKING` label automatically
|
||||
- Requires 2 reviewers (elevated review)
|
||||
- Commit message must have `BREAKING:` prefix
|
||||
- CHANGELOG.md entry with migration guide required
|
||||
- On merge: previous stable tagged as `:deprecated-YYYY.MM.N`
|
||||
|
||||
### Deprecated channel
|
||||
|
||||
When a breaking change merges:
|
||||
1. Previous stable image retagged to `:deprecated-2026.04.N`
|
||||
2. New build becomes `:stable` + `:2026.04.(N+1)`
|
||||
3. Health endpoint on deprecated version shows warning:
|
||||
```json
|
||||
{"warnings": ["Running deprecated version 2026.04.3. Update to stable."]}
|
||||
```
|
||||
4. Deprecated images removed from GHCR after 30 days
|
||||
|
||||
---
|
||||
|
||||
## 3. Smoke Test (Post-Deploy Safety Net)
|
||||
|
||||
### What it tests
|
||||
|
||||
Automated sequence run on canary VM after every `:stable` deploy:
|
||||
|
||||
```
|
||||
1. GET /api/health → status != "unhealthy"
|
||||
2. POST /auth/token → 200 (valid credentials)
|
||||
3. GET /api/catalog/tables → count > 0
|
||||
4. POST /api/query {sql: "SELECT 1"} → 200 + rows
|
||||
5. POST /api/sync/trigger → 200
|
||||
6. (wait 30s)
|
||||
7. GET /api/health → check no new errors
|
||||
```
|
||||
|
||||
### On failure
|
||||
|
||||
1. Alert (GitHub issue + optional webhook)
|
||||
2. Canary VM rolled back to previous stable: `docker compose pull && docker compose up -d` with previous tag
|
||||
3. Failed build tagged `:deprecated-YYYY.MM.N`
|
||||
4. `:stable` tag reverted to previous good build
|
||||
|
||||
### Implementation
|
||||
|
||||
GitHub Actions workflow triggered after the build-and-push workflow completes:
|
||||
|
||||
```yaml
|
||||
smoke-test:
|
||||
needs: build-and-push
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to canary
|
||||
run: |
|
||||
gcloud compute ssh canary-vm --command="
|
||||
cd /opt/agnes &&
|
||||
docker compose pull &&
|
||||
docker compose up -d"
|
||||
|
||||
- name: Wait for healthy
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
STATUS=$(curl -sf canary:8000/api/health | jq -r .status)
|
||||
[ "$STATUS" != "unhealthy" ] && break
|
||||
sleep 10
|
||||
done
|
||||
|
||||
- name: Run smoke tests
|
||||
run: |
|
||||
# auth, catalog, query, sync checks
|
||||
./scripts/smoke-test.sh canary:8000
|
||||
|
||||
- name: Rollback on failure
|
||||
if: failure()
|
||||
run: |
|
||||
# retag and rollback
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Self-Service Deployment
|
||||
|
||||
### Target experience
|
||||
|
||||
Customer (or their AI agent) goes from zero to running instance:
|
||||
|
||||
```bash
|
||||
# 1. Get the code
|
||||
git clone https://github.com/keboola/agnes-the-ai-analyst.git
|
||||
cd agnes-the-ai-analyst
|
||||
|
||||
# 2. Start it
|
||||
docker compose up -d
|
||||
|
||||
# 3. Open browser or use API
|
||||
# First visit: /setup wizard (no users exist)
|
||||
# Or headless: curl -X POST localhost:8000/auth/bootstrap ...
|
||||
```
|
||||
|
||||
### Two setup modes
|
||||
|
||||
**A) Interactive (browser):**
|
||||
- First visit when no users exist → redirected to `/setup`
|
||||
- Step 1: Create admin account (email + password)
|
||||
- Step 2: Choose data source (Keboola / BigQuery / CSV / Custom)
|
||||
- Step 3: Enter credentials (token, URL)
|
||||
- Step 4: Auto-discover and register tables
|
||||
- Step 5: Trigger first sync
|
||||
- Done → redirect to dashboard
|
||||
|
||||
**B) Headless (AI agent / CLI):**
|
||||
```bash
|
||||
# Bootstrap admin
|
||||
curl -X POST http://localhost:8000/auth/bootstrap \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"email":"admin@company.com","password":"SecurePass123!"}'
|
||||
|
||||
# Configure data source
|
||||
curl -X POST http://localhost:8000/api/admin/configure \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"data_source":"keboola","keboola_token":"...","keboola_url":"..."}'
|
||||
|
||||
# Discover and register tables
|
||||
curl -X POST http://localhost:8000/api/admin/discover-and-register \
|
||||
-H "Authorization: Bearer $TOKEN"
|
||||
|
||||
# Trigger first sync
|
||||
curl -X POST http://localhost:8000/api/sync/trigger \
|
||||
-H "Authorization: Bearer $TOKEN"
|
||||
```
|
||||
|
||||
Both modes lead to same result. AI agent uses headless.
|
||||
|
||||
### Auto-configuration
|
||||
|
||||
On first `docker compose up` with no `.env`:
|
||||
- `JWT_SECRET_KEY` auto-generated and persisted to `/data/state/.jwt_secret`
|
||||
- `SESSION_SECRET` auto-generated similarly
|
||||
- App starts in "setup mode" — only `/setup`, `/auth/bootstrap`, and `/api/health` accessible
|
||||
|
||||
On first `docker compose up` with `.env` containing `KEBOOLA_STORAGE_TOKEN`:
|
||||
- Auto-discovers tables from Keboola on first sync
|
||||
- Skips manual table registration step
|
||||
|
||||
### What customer must provide
|
||||
|
||||
| Required | Optional |
|
||||
|----------|----------|
|
||||
| Server with Docker | Custom domain + TLS |
|
||||
| Admin email + password | Google OAuth credentials |
|
||||
| Data source credentials (Keboola token OR BigQuery creds OR CSV files) | Telegram bot token |
|
||||
| | Jira webhook secret |
|
||||
|
||||
### What customer must NOT do
|
||||
|
||||
- Edit YAML manually (setup wizard generates `instance.yaml`)
|
||||
- Generate JWT secret (auto-generated)
|
||||
- Register tables manually (auto-discovery)
|
||||
- Understand DuckDB internals
|
||||
|
||||
---
|
||||
|
||||
## 5. Custom Connectors (Three Tiers)
|
||||
|
||||
All tiers produce the same output: `extract.duckdb` with `_meta` table + `data/*.parquet`. Orchestrator treats them identically.
|
||||
|
||||
### Tier A: Local mount (fastest, AI-generated)
|
||||
|
||||
Customer's AI agent generates a connector. Lives outside Docker image, survives updates.
|
||||
|
||||
```
|
||||
/opt/agnes/
|
||||
├── docker-compose.yml ← official image
|
||||
├── docker-compose.override.yml ← customer additions
|
||||
└── custom-connectors/
|
||||
└── snowflake/
|
||||
├── extractor.py
|
||||
└── requirements.txt
|
||||
```
|
||||
|
||||
```yaml
|
||||
# docker-compose.override.yml
|
||||
services:
|
||||
app:
|
||||
volumes:
|
||||
- ./custom-connectors:/app/connectors/custom:ro
|
||||
```
|
||||
|
||||
Orchestrator scans `connectors/custom/*/` in addition to built-in connectors.
|
||||
|
||||
**How the AI agent creates one:**
|
||||
1. Reads CLAUDE.md → understands extract.duckdb contract
|
||||
2. Reads existing connector as reference (e.g., `connectors/keboola/extractor.py`)
|
||||
3. Generates `custom-connectors/snowflake/extractor.py`
|
||||
4. Runs contract test to validate output
|
||||
5. Done — orchestrator picks it up on next rebuild
|
||||
|
||||
**Requirements for this to work:**
|
||||
- CLAUDE.md must perfectly describe the contract
|
||||
- Contract test must be runnable standalone
|
||||
- Existing connectors must be readable as examples
|
||||
- Clear error messages when contract doesn't match
|
||||
|
||||
### Tier B: Standalone container (complex dependencies)
|
||||
|
||||
For connectors needing their own runtime (Java, .NET, heavy Python packages).
|
||||
|
||||
```yaml
|
||||
# docker-compose.override.yml
|
||||
services:
|
||||
connector-sap:
|
||||
build: ./custom-connectors/sap
|
||||
volumes:
|
||||
- data:/data
|
||||
environment:
|
||||
- DATA_DIR=/data
|
||||
- SAP_HOST=...
|
||||
profiles:
|
||||
- extract
|
||||
```
|
||||
|
||||
Connector is its own Docker image. Writes to `/data/extracts/sap/extract.duckdb`. Orchestrator finds it automatically.
|
||||
|
||||
### Tier C: Community PR (shared with all)
|
||||
|
||||
Connector contributed to main repo via PR. After merge, available in official image for all customers.
|
||||
|
||||
```
|
||||
connectors/
|
||||
├── keboola/ ← built-in
|
||||
├── bigquery/ ← built-in
|
||||
├── jira/ ← built-in
|
||||
└── snowflake/ ← community contributed
|
||||
```
|
||||
|
||||
**PR requirements:**
|
||||
- Must pass contract tests
|
||||
- Must include tests
|
||||
- Must not modify shared code (orchestrator, API, auth)
|
||||
- CI runs full connector matrix
|
||||
|
||||
---
|
||||
|
||||
## 6. CI/CD Pipeline
|
||||
|
||||
### On feature branch push
|
||||
|
||||
```yaml
|
||||
ci.yml:
|
||||
- tests (all 654+)
|
||||
- contract tests (all connectors)
|
||||
- docker build
|
||||
- push :dev + :dev-sha-xxx to GHCR
|
||||
```
|
||||
|
||||
### On merge to main
|
||||
|
||||
```yaml
|
||||
release.yml:
|
||||
- tests (all)
|
||||
- contract tests (all connectors)
|
||||
- breaking change detection (OpenAPI diff, schema diff)
|
||||
- docker build
|
||||
- push :stable + :YYYY.MM.N + :sha-xxx to GHCR
|
||||
- trigger smoke test on canary
|
||||
|
||||
smoke-test.yml (triggered):
|
||||
- deploy to canary VM
|
||||
- run smoke test sequence
|
||||
- on failure: rollback canary, tag build as deprecated, create alert
|
||||
```
|
||||
|
||||
### On PR
|
||||
|
||||
```yaml
|
||||
pr-check.yml:
|
||||
- tests
|
||||
- contract tests
|
||||
- breaking change detection
|
||||
- label PR: "BREAKING" if detected
|
||||
- require 2 reviewers if breaking
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Infrastructure (Cloud-Agnostic)
|
||||
|
||||
### Primary: Docker Compose
|
||||
|
||||
Works everywhere Docker runs. This is the default and only required deployment method.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/keboola/agnes-the-ai-analyst.git
|
||||
cd agnes-the-ai-analyst
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Optional: Terraform (GCP)
|
||||
|
||||
For automated provisioning. Lives in `infra/` with GCS remote state backend.
|
||||
|
||||
```bash
|
||||
cd infra
|
||||
terraform workspace new customer-name
|
||||
terraform apply -var-file=instances/customer-name.tfvars
|
||||
```
|
||||
|
||||
Creates VM, installs Docker, clones repo, generates `.env` and `instance.yaml`, starts Docker Compose.
|
||||
|
||||
### Optional: Caddy TLS
|
||||
|
||||
Production profile adds Caddy reverse proxy with automatic Let's Encrypt:
|
||||
|
||||
```bash
|
||||
DOMAIN=data.customer.com docker compose --profile production up -d
|
||||
```
|
||||
|
||||
### Directory layout on customer server
|
||||
|
||||
```
|
||||
/opt/agnes/ ← git clone
|
||||
├── docker-compose.yml ← official
|
||||
├── docker-compose.prod.yml ← GHCR images
|
||||
├── docker-compose.override.yml ← customer customizations
|
||||
├── .env ← secrets (gitignored)
|
||||
├── config/
|
||||
│ └── instance.yaml ← generated by setup wizard
|
||||
├── custom-connectors/ ← Tier A connectors
|
||||
│ └── snowflake/
|
||||
└── Caddyfile ← TLS config
|
||||
|
||||
/data/ ← Docker volume (persistent)
|
||||
├── state/system.duckdb ← users, registry, sync state
|
||||
├── analytics/server.duckdb ← views into extracts
|
||||
└── extracts/ ← per-source data
|
||||
├── keboola/extract.duckdb
|
||||
├── bigquery/extract.duckdb
|
||||
└── snowflake/extract.duckdb ← from custom connector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. AI Agent as Primary Installer
|
||||
|
||||
CLAUDE.md and documentation must be optimized for AI agent consumption:
|
||||
|
||||
### CLAUDE.md requirements
|
||||
- Complete extract.duckdb contract with exact SQL for `_meta` and `_remote_attach`
|
||||
- Step-by-step setup instructions with exact curl commands
|
||||
- Existing connectors as reference for AI-generated new ones
|
||||
- Clear error messages explaining what went wrong and how to fix
|
||||
|
||||
### API requirements
|
||||
- All setup operations available as API calls (not just UI)
|
||||
- Self-describing error messages: `"Missing KEBOOLA_STORAGE_TOKEN. Set it in .env or pass via /api/admin/configure"`
|
||||
- `/api/health` returns structured diagnostics AI agent can parse
|
||||
- `/api/admin/configure` accepts data source config without file editing
|
||||
|
||||
### Documentation requirements
|
||||
- Machine-readable (no screenshots, no "click here")
|
||||
- Every manual step has an equivalent API/CLI command
|
||||
- QUICKSTART.md optimized for copy-paste by AI agent
|
||||
|
||||
---
|
||||
|
||||
## 9. What Needs to Be Built
|
||||
|
||||
### Must have (blocks multi-instance)
|
||||
|
||||
| # | What | Effort |
|
||||
|---|------|--------|
|
||||
| 1 | CalVer auto-tagging in CI (release.yml) | 1 day |
|
||||
| 2 | Smoke test script + CI workflow | 1 day |
|
||||
| 3 | Breaking change detection in CI (OpenAPI diff, contract diff) | 2 days |
|
||||
| 4 | `/setup` wizard (web) + `/api/admin/configure` (headless) | 3 days |
|
||||
| 5 | Auto-generate JWT_SECRET_KEY on first start | 0.5 day |
|
||||
| 6 | Auto-discovery for Keboola tables on first sync | 1 day |
|
||||
| 7 | Custom connector mount support in orchestrator | 1 day |
|
||||
| 8 | `CHANGELOG.md` + release notes template | 0.5 day |
|
||||
| 9 | Health endpoint version + channel info | 0.5 day |
|
||||
|
||||
### Should have (improves experience)
|
||||
|
||||
| # | What | Effort |
|
||||
|---|------|--------|
|
||||
| 10 | Deprecated version warning in health endpoint | 0.5 day |
|
||||
| 11 | `/api/admin/discover-and-register` auto-discovery endpoint | 1 day |
|
||||
| 12 | Standalone container connector example (Tier B) | 0.5 day |
|
||||
| 13 | CLAUDE.md optimization for AI agent setup | 1 day |
|
||||
| 14 | Terraform module refactor for multi-workspace | 1 day |
|
||||
|
||||
### Nice to have (future)
|
||||
|
||||
| # | What |
|
||||
|---|------|
|
||||
| 15 | Community connector contribution guide |
|
||||
| 16 | Instance health dashboard (central monitoring) |
|
||||
| 17 | Automated backup (GCP disk snapshots) |
|
||||
| 18 | Usage analytics (opt-in telemetry) |
|
||||
|
||||
---
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Multi-tenancy in single process (each customer = separate instance)
|
||||
- Kubernetes/Helm (Docker Compose is sufficient for target scale)
|
||||
- Paid tier / license keys (open-source, monetization TBD)
|
||||
- GUI for connector development (AI agent + CLAUDE.md is sufficient)
|
||||
16
scripts/generate_openapi.py
Normal file
16
scripts/generate_openapi.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"""Generate OpenAPI snapshot from the current FastAPI app."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
os.environ.setdefault("TESTING", "1")
|
||||
os.environ.setdefault("JWT_SECRET_KEY", "snapshot-generation-key-32-chars-min!!")
|
||||
|
||||
from app.main import create_app # noqa: E402
|
||||
|
||||
app = create_app()
|
||||
schema = app.openapi()
|
||||
json.dump(schema, sys.stdout, indent=2, sort_keys=True)
|
||||
sys.stdout.write("\n")
|
||||
97
scripts/smoke-test.sh
Executable file
97
scripts/smoke-test.sh
Executable file
|
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/env bash
|
||||
# Agnes smoke test — verifies a running instance is functional.
|
||||
# Usage: ./scripts/smoke-test.sh [host:port]
|
||||
# Default: http://localhost:8000
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${1:-http://localhost:8000}"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TOKEN=""
|
||||
|
||||
check() {
|
||||
local name="$1" ok="$2"
|
||||
if [ "$ok" = "true" ]; then
|
||||
echo " PASS $name"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $name"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "Smoke test: $HOST"
|
||||
echo "---"
|
||||
|
||||
# 1. Health check
|
||||
HEALTH=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
|
||||
if [ "$HEALTH" = "unhealthy" ] || [ "$HEALTH" = "unreachable" ]; then
|
||||
echo " FATAL: health=$HEALTH"
|
||||
exit 1
|
||||
fi
|
||||
check "health ($HEALTH)" "true"
|
||||
|
||||
# 2. Health has version fields
|
||||
HAS_VERSION=$(curl -sf "$HOST/api/health" | python3 -c "
|
||||
import sys,json
|
||||
d=json.load(sys.stdin)
|
||||
print('true' if 'version' in d and 'channel' in d and 'schema_version' in d else 'false')
|
||||
" 2>/dev/null || echo "false")
|
||||
check "health version fields" "$HAS_VERSION"
|
||||
|
||||
# 3. Bootstrap (only works on fresh DB; 403 means users exist)
|
||||
BOOT_HTTP=$(curl -s -o /tmp/smoke_boot.json -w "%{http_code}" -X POST "$HOST/auth/bootstrap" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"email":"smoke@test.local","name":"Smoke Test","password":"SmokeTest123!"}' 2>/dev/null || echo "000")
|
||||
|
||||
if [ "$BOOT_HTTP" = "200" ]; then
|
||||
TOKEN=$(python3 -c "import json; print(json.load(open('/tmp/smoke_boot.json'))['access_token'])" 2>/dev/null || echo "")
|
||||
check "bootstrap (new admin)" "true"
|
||||
elif [ "$BOOT_HTTP" = "403" ]; then
|
||||
TOKEN="${SMOKE_TOKEN:-}"
|
||||
echo " SKIP bootstrap (users exist)"
|
||||
else
|
||||
check "bootstrap (HTTP $BOOT_HTTP)" "false"
|
||||
fi
|
||||
|
||||
# 4. Query SELECT 1 (requires auth)
|
||||
if [ -n "$TOKEN" ]; then
|
||||
QUERY_OK=$(curl -sf -X POST "$HOST/api/query" \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"sql":"SELECT 1 as test"}' | python3 -c "
|
||||
import sys,json
|
||||
d=json.load(sys.stdin)
|
||||
print('true' if len(d.get('rows',[])) > 0 else 'false')
|
||||
" 2>/dev/null || echo "false")
|
||||
check "query SELECT 1" "$QUERY_OK"
|
||||
else
|
||||
echo " SKIP query (no token)"
|
||||
fi
|
||||
|
||||
# 5. Sync trigger
|
||||
if [ -n "$TOKEN" ]; then
|
||||
SYNC_HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$HOST/api/sync/trigger" \
|
||||
-H "Authorization: Bearer $TOKEN" 2>/dev/null || echo "000")
|
||||
if [[ "$SYNC_HTTP" =~ ^(200|202)$ ]]; then
|
||||
check "sync trigger" "true"
|
||||
else
|
||||
check "sync trigger (HTTP $SYNC_HTTP)" "false"
|
||||
fi
|
||||
else
|
||||
echo " SKIP sync (no token)"
|
||||
fi
|
||||
|
||||
# 6. Post-sync health (wait briefly)
|
||||
sleep 5
|
||||
HEALTH2=$(curl -sf "$HOST/api/health" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])" 2>/dev/null || echo "unreachable")
|
||||
if [ "$HEALTH2" = "unhealthy" ] || [ "$HEALTH2" = "unreachable" ]; then
|
||||
check "post-sync health ($HEALTH2)" "false"
|
||||
else
|
||||
check "post-sync health ($HEALTH2)" "true"
|
||||
fi
|
||||
|
||||
# Results
|
||||
echo ""
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" -eq 0 ] || exit 1
|
||||
23
src/db.py
23
src/db.py
|
|
@ -4,12 +4,16 @@ Provides get_system_db() for the system state database
|
|||
and get_analytics_db() for the analytics database with parquet views.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import duckdb
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||
|
||||
SCHEMA_VERSION = 3
|
||||
|
|
@ -260,6 +264,25 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|||
"""Create tables if they don't exist. Apply migrations if schema version changed."""
|
||||
current = get_schema_version(conn)
|
||||
if current < SCHEMA_VERSION:
|
||||
# Snapshot before migration for rollback support
|
||||
if current > 0:
|
||||
try:
|
||||
db_path = Path(os.environ.get("DATA_DIR", "./data")) / "state" / "system.duckdb"
|
||||
if db_path.exists():
|
||||
# Flush WAL to main DB file before copying
|
||||
try:
|
||||
conn.execute("CHECKPOINT")
|
||||
except Exception:
|
||||
pass # CHECKPOINT may fail on read-only or in-memory DBs
|
||||
snapshot = db_path.parent / "system.duckdb.pre-migrate"
|
||||
shutil.copy2(str(db_path), str(snapshot))
|
||||
# Also copy WAL if it still exists (belt and suspenders)
|
||||
wal_path = Path(str(db_path) + ".wal")
|
||||
if wal_path.exists():
|
||||
shutil.copy2(str(wal_path), str(snapshot) + ".wal")
|
||||
logger.info("Pre-migration snapshot saved: %s", snapshot)
|
||||
except Exception as e:
|
||||
logger.warning("Could not create pre-migration snapshot: %s", e)
|
||||
conn.execute(_SYSTEM_SCHEMA)
|
||||
if current == 0:
|
||||
conn.execute(
|
||||
|
|
|
|||
5151
tests/snapshots/openapi.json
Normal file
5151
tests/snapshots/openapi.json
Normal file
File diff suppressed because it is too large
Load diff
199
tests/test_db.py
199
tests/test_db.py
|
|
@ -144,6 +144,205 @@ class TestGetAnalyticsDb:
|
|||
conn.close()
|
||||
|
||||
|
||||
class TestMigrationSafety:
|
||||
"""Tests for schema migration correctness, idempotency, and safety snapshots."""
|
||||
|
||||
# Minimal v2 table_registry (no is_public column — that comes in v3)
|
||||
_V2_TABLE_REGISTRY = """
|
||||
CREATE TABLE table_registry (
|
||||
id VARCHAR PRIMARY KEY,
|
||||
name VARCHAR NOT NULL,
|
||||
source_type VARCHAR,
|
||||
bucket VARCHAR,
|
||||
source_table VARCHAR,
|
||||
sync_strategy VARCHAR DEFAULT 'full_refresh',
|
||||
query_mode VARCHAR DEFAULT 'local',
|
||||
sync_schedule VARCHAR,
|
||||
profile_after_sync BOOLEAN DEFAULT true,
|
||||
primary_key VARCHAR,
|
||||
folder VARCHAR,
|
||||
description TEXT,
|
||||
registered_by VARCHAR,
|
||||
registered_at TIMESTAMP DEFAULT current_timestamp
|
||||
);
|
||||
"""
|
||||
|
||||
def _create_v2_db(self, db_path):
|
||||
"""Create a minimal v2-schema DuckDB file at db_path."""
|
||||
import duckdb as _duckdb
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
conn.execute(
|
||||
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
|
||||
"INSERT INTO schema_version (version) VALUES (2);"
|
||||
)
|
||||
conn.execute(self._V2_TABLE_REGISTRY)
|
||||
# Stub out remaining tables so _ensure_schema doesn't fail
|
||||
for ddl in [
|
||||
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
|
||||
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
|
||||
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
|
||||
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
]:
|
||||
conn.execute(ddl)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_v2_to_v3_migration(self, tmp_path, monkeypatch):
|
||||
"""v2 DB migrated to v3: schema_version=3 and is_public column added."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import _ensure_schema, get_schema_version
|
||||
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
self._create_v2_db(db_path)
|
||||
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
_ensure_schema(conn)
|
||||
assert get_schema_version(conn) == 3
|
||||
cols = {
|
||||
r[0]
|
||||
for r in conn.execute(
|
||||
"SELECT column_name FROM information_schema.columns WHERE table_name='table_registry'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "is_public" in cols
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_migration_idempotency(self, tmp_path, monkeypatch):
|
||||
"""Calling _ensure_schema twice on a fresh DB raises no error and leaves version at 3."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import _ensure_schema, get_schema_version, SCHEMA_VERSION
|
||||
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
_ensure_schema(conn)
|
||||
_ensure_schema(conn)
|
||||
assert get_schema_version(conn) == SCHEMA_VERSION
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_migration_preserves_data(self, tmp_path, monkeypatch):
|
||||
"""Data inserted before migration is preserved after migration runs."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import _ensure_schema, get_schema_version, _SYSTEM_SCHEMA
|
||||
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
# Build a v1 schema manually
|
||||
conn.execute(
|
||||
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
|
||||
"INSERT INTO schema_version (version) VALUES (1);"
|
||||
)
|
||||
conn.execute("""
|
||||
CREATE TABLE table_registry (
|
||||
id VARCHAR PRIMARY KEY,
|
||||
name VARCHAR NOT NULL,
|
||||
folder VARCHAR,
|
||||
sync_strategy VARCHAR,
|
||||
primary_key VARCHAR,
|
||||
description TEXT,
|
||||
registered_by VARCHAR,
|
||||
registered_at TIMESTAMP DEFAULT current_timestamp
|
||||
);
|
||||
""")
|
||||
conn.execute(
|
||||
"INSERT INTO table_registry (id, name, description) VALUES ('row1', 'MyTable', 'kept')"
|
||||
)
|
||||
# Stub remaining tables
|
||||
for ddl in [
|
||||
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
|
||||
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
|
||||
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
|
||||
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
]:
|
||||
conn.execute(ddl)
|
||||
|
||||
_ensure_schema(conn)
|
||||
|
||||
assert get_schema_version(conn) == 3
|
||||
row = conn.execute(
|
||||
"SELECT name, description FROM table_registry WHERE id='row1'"
|
||||
).fetchone()
|
||||
assert row is not None, "Pre-migration row was lost"
|
||||
assert row[0] == "MyTable"
|
||||
assert row[1] == "kept"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_pre_migration_snapshot_created(self, tmp_path, monkeypatch):
|
||||
"""A pre-migrate snapshot is written when migrating an existing (non-fresh) DB."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
from src.db import get_system_db
|
||||
|
||||
# Create a v2 DB at the expected path before calling get_system_db
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
self._create_v2_db(db_path)
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
|
||||
assert snapshot.exists(), "Pre-migration snapshot was not created"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_no_snapshot_on_fresh_db(self, tmp_path, monkeypatch):
|
||||
"""No pre-migrate snapshot is created when initialising a brand-new DB."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
from src.db import get_system_db
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
snapshot = tmp_path / "state" / "system.duckdb.pre-migrate"
|
||||
assert not snapshot.exists(), "Snapshot should not exist for a fresh DB"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_future_version_is_noop(self, tmp_path, monkeypatch):
|
||||
"""_ensure_schema does nothing when schema_version > SCHEMA_VERSION."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import _ensure_schema, get_schema_version
|
||||
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
conn.execute(
|
||||
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
|
||||
"INSERT INTO schema_version (version) VALUES (99);"
|
||||
)
|
||||
_ensure_schema(conn)
|
||||
assert get_schema_version(conn) == 99
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
class TestGetAnalyticsDbReadonly:
|
||||
def test_analytics_readonly_rejects_malicious_dir_name(self, tmp_path, monkeypatch):
|
||||
"""Directories with SQL-injection chars in their name are skipped."""
|
||||
|
|
|
|||
73
tests/test_openapi_snapshot.py
Normal file
73
tests/test_openapi_snapshot.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
"""OpenAPI snapshot test — detect breaking API changes.
|
||||
|
||||
Compares the current app's OpenAPI schema against a committed snapshot.
|
||||
Fails if any path or HTTP method has been removed (breaking change).
|
||||
|
||||
To update the snapshot after an intentional change:
|
||||
make update-openapi-snapshot
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "openapi.json"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def current_schema():
|
||||
os.environ.setdefault("TESTING", "1")
|
||||
from app.main import create_app
|
||||
|
||||
app = create_app()
|
||||
return app.openapi()
|
||||
|
||||
|
||||
def test_snapshot_exists():
|
||||
"""Committed OpenAPI snapshot must exist."""
|
||||
assert SNAPSHOT_PATH.exists(), (
|
||||
"No OpenAPI snapshot found. Generate one with: make update-openapi-snapshot"
|
||||
)
|
||||
|
||||
|
||||
def test_no_removed_paths(current_schema):
|
||||
"""No API paths should be removed compared to the snapshot."""
|
||||
if not SNAPSHOT_PATH.exists():
|
||||
pytest.skip("No snapshot to compare against")
|
||||
|
||||
snapshot = json.loads(SNAPSHOT_PATH.read_text())
|
||||
current_paths = set(current_schema.get("paths", {}))
|
||||
snapshot_paths = set(snapshot.get("paths", {}))
|
||||
|
||||
removed = snapshot_paths - current_paths
|
||||
assert not removed, (
|
||||
f"BREAKING: {len(removed)} API path(s) removed: {sorted(removed)}\n"
|
||||
"If intentional, run: make update-openapi-snapshot"
|
||||
)
|
||||
|
||||
|
||||
def test_no_removed_methods(current_schema):
|
||||
"""No HTTP methods should be removed from existing paths."""
|
||||
if not SNAPSHOT_PATH.exists():
|
||||
pytest.skip("No snapshot to compare against")
|
||||
|
||||
snapshot = json.loads(SNAPSHOT_PATH.read_text())
|
||||
current_paths = current_schema.get("paths", {})
|
||||
snapshot_paths = snapshot.get("paths", {})
|
||||
|
||||
breaking = []
|
||||
for path in set(snapshot_paths) & set(current_paths):
|
||||
removed_methods = set(snapshot_paths[path]) - set(current_paths[path])
|
||||
# Ignore non-HTTP keys like 'parameters'
|
||||
http_methods = {"get", "post", "put", "delete", "patch", "head", "options"}
|
||||
removed_http = removed_methods & http_methods
|
||||
if removed_http:
|
||||
breaking.append(f" {path}: {sorted(removed_http)}")
|
||||
|
||||
assert not breaking, (
|
||||
f"BREAKING: HTTP methods removed from {len(breaking)} path(s):\n"
|
||||
+ "\n".join(breaking)
|
||||
+ "\nIf intentional, run: make update-openapi-snapshot"
|
||||
)
|
||||
|
|
@ -304,26 +304,37 @@ class TestJwtClaims:
|
|||
# ---- JWT Secret Hardening ----
|
||||
|
||||
class TestJwtSecretHardening:
|
||||
def test_raises_without_jwt_secret_in_non_test_env(self):
|
||||
"""Module-level code must raise RuntimeError when JWT_SECRET_KEY is absent
|
||||
and TESTING is not set, preventing accidental production deploys with no secret."""
|
||||
def test_auto_generates_jwt_secret_when_absent(self, tmp_path):
|
||||
"""When JWT_SECRET_KEY is absent and TESTING is not set,
|
||||
the secret is auto-generated and persisted to a file."""
|
||||
saved_key = os.environ.pop("JWT_SECRET_KEY", None)
|
||||
saved_testing = os.environ.pop("TESTING", None)
|
||||
# Eject any cached module so the re-import re-executes module-level code
|
||||
saved_data_dir = os.environ.get("DATA_DIR")
|
||||
os.environ["DATA_DIR"] = str(tmp_path)
|
||||
# Eject cached modules so the re-import re-executes module-level code
|
||||
sys.modules.pop("app.auth.jwt", None)
|
||||
sys.modules.pop("app.secrets", None)
|
||||
try:
|
||||
with pytest.raises(RuntimeError, match="JWT_SECRET_KEY environment variable is required"):
|
||||
importlib.import_module("app.auth.jwt")
|
||||
importlib.import_module("app.auth.jwt")
|
||||
secret_file = tmp_path / "state" / ".jwt_secret"
|
||||
assert secret_file.exists(), "JWT secret file should be auto-generated"
|
||||
secret = secret_file.read_text().strip()
|
||||
assert len(secret) == 64, "Auto-generated secret should be 64 hex chars (32 bytes)"
|
||||
finally:
|
||||
# Restore environment before re-importing so the module loads cleanly
|
||||
if saved_key is not None:
|
||||
os.environ["JWT_SECRET_KEY"] = saved_key
|
||||
if saved_testing is not None:
|
||||
os.environ["TESTING"] = saved_testing
|
||||
if saved_data_dir is not None:
|
||||
os.environ["DATA_DIR"] = saved_data_dir
|
||||
else:
|
||||
os.environ.pop("DATA_DIR", None)
|
||||
# If neither was set (bare test run), use TESTING flag so reload works
|
||||
if saved_key is None and saved_testing is None:
|
||||
os.environ["TESTING"] = "1"
|
||||
sys.modules.pop("app.auth.jwt", None)
|
||||
sys.modules.pop("app.secrets", None)
|
||||
importlib.import_module("app.auth.jwt")
|
||||
# Clean up the temporary TESTING flag if we added it
|
||||
if saved_key is None and saved_testing is None:
|
||||
|
|
|
|||
Loading…
Reference in a new issue