agnes-the-ai-analyst/config/.env.template

# Agnes AI Data Analyst - Environment Variables
# =============================================
# Copy to .env: cp config/.env.template .env
# .env is gitignored - NEVER commit it.

# ── REQUIRED ────────────────────────────────────────
JWT_SECRET_KEY=              # python -c "import secrets; print(secrets.token_hex(32))"
SESSION_SECRET=              # python -c "import secrets; print(secrets.token_hex(32))"

# ── GOOGLE OAUTH (required for Google login) ────────
# GOOGLE_CLIENT_ID=
# GOOGLE_CLIENT_SECRET=

# ── KEBOOLA (required for Keboola data source) ──────
# KEBOOLA_STORAGE_TOKEN=
# KEBOOLA_STACK_URL=https://connection.keboola.com

# ── BIGQUERY (required for BigQuery data source) ─────
# BIGQUERY_PROJECT=
# BIGQUERY_LOCATION=us

# ── BOOTSTRAP (first deploy only) ───────────────────
# SEED_ADMIN_EMAIL=admin@example.com
# SEED_ADMIN_PASSWORD=                # Dev helper only — sets password_hash on seed.
#                                     # Never overwrites an existing password.

# ── EMAIL / SMTP (required for magic link auth) ─────
# SMTP_HOST=smtp.gmail.com
# SMTP_PORT=587
# SMTP_USER=
# SMTP_PASSWORD=

# ── OPTIONAL SERVICES ───────────────────────────────
# TELEGRAM_BOT_TOKEN=
# JIRA_WEBHOOK_SECRET=
# JIRA_API_TOKEN=
# ANTHROPIC_API_KEY=
# LLM_API_KEY=

# ── DESKTOP APP ─────────────────────────────────────
# DESKTOP_JWT_SECRET=       # Separate secret for desktop app tokens

# ── DEPLOYMENT ──────────────────────────────────────
# DATA_DIR=/data            # Default: /data in Docker, ./data locally
# LOG_LEVEL=info            # debug, info, warning, error
# CORS_ORIGINS=http://localhost:3000,http://localhost:8000

# ── SCHEDULER (sidecar tuning) ──────────────────────
# All values are in seconds and must be positive integers. SCHEDULER_TICK_SECONDS
# must be <= the smallest job interval below.
# SCHEDULER_DATA_REFRESH_INTERVAL=900   # default 15 min — POST /api/sync/trigger
# SCHEDULER_HEALTH_CHECK_INTERVAL=300   # default 5 min  — GET  /api/health
# SCHEDULER_SCRIPT_RUN_INTERVAL=60      # default 1 min  — POST /api/scripts/run-due
# SCHEDULER_TICK_SECONDS=30             # default 30 s   — loop polling cadence

# ── HTTPS / REVERSE PROXY ───────────────────────────
# Set these when the app runs behind a TLS terminator (Caddy, Cloudflare
# Tunnel, nginx, GCP LB, etc.). The app itself speaks plain HTTP on :8000;
# the terminator is responsible for TLS.
#
# DOMAIN: public hostname. When set, session cookies get the `Secure` flag
#         (browser only sends them over HTTPS). Also used by the Caddy
#         profile to auto-provision Let's Encrypt certs.
# DOMAIN=data.yourcompany.com
#
# SERVER_URL: absolute base URL used to build OAuth callback URLs and other
#             external links. Set this to avoid relying on the incoming
#             request's Host header (which a misconfigured proxy can get
#             wrong). Must match the redirect URI registered in OAuth apps.
# SERVER_URL=https://data.yourcompany.com
#
# Uvicorn is started with `--proxy-headers --forwarded-allow-ips='*'` so it
# trusts X-Forwarded-Proto / X-Forwarded-For from the reverse proxy.

# ── TLS TERMINATION (Caddy in cert-file mode) ───────
# When TLS_FULLCHAIN_URL is set, scripts/ops/agnes-tls-rotate.sh fetches
# the cert daily from this URL and reloads Caddy on diff (zero downtime).
# Empty -> no TLS, app serves plain HTTP on :8000. See docs/DEPLOYMENT.md
# -> TLS for the full bring-up flow.
#
# Supported URL schemes (all four scripts/tls-fetch.sh resolves):
#   sm://<secret-name>          Google Secret Manager (latest version)
#   gs://<bucket>/<obj>         GCS object
#   https://<url>               Plain HTTPS download (no redirects allowed)
#   file://<path>               Local file (dev/testing only)
#
# TLS_FULLCHAIN_URL=
#
# TLS_PRIVKEY_URL: optional. Empty -> on-VM RSA-2048 key + CSR auto-
# generated on first rotate tick (key never leaves the host; CSR at
# /data/state/certs/cert.csr to submit to your CA). Set to a URL when
# you want VM-replace resilience (e.g. sm://<secret>).
# TLS_PRIVKEY_URL=
#
# TLS_CSR_SUBJECT: stamped on auto-generated CSRs and on the self-signed
# bring-up cert that Caddy serves until your CA publishes the real chain.
# Defaults to /CN=$DOMAIN when unset.
# TLS_CSR_SUBJECT=/C=US/ST=California/L=San Francisco/O=Your Org/CN=data.yourcompany.com

# === Local development ===
# DEBUG=1 enables:
#   - rich.logging.RichHandler (colored, with tracebacks)
#   - fastapi-debug-toolbar mounted at right edge of HTML pages
#   - DuckDB query capture in the toolbar
# Note: FastAPI's own debug=True flag is intentionally NOT toggled. The
# Starlette ServerErrorMiddleware it installs would intercept unhandled
# exceptions and render a plain-HTML traceback before the custom 500 page
# (with debug toolbar) can run. See the comment on `app = FastAPI(...)` in
# app/main.py for details.
# Never set in production. Keep separate from LOCAL_DEV_MODE (auth bypass).
# IMPORTANT: DEBUG is read at process start by app/main.py to decide whether
# to mount the toolbar middleware. The DuckDB connection wrapper in src/db.py
# reads DEBUG at call time, so the toolbar's mount status is fixed once the
# app starts, but per-connection instrumentation respects runtime env changes.
# DEBUG=1

# === Optional observability: PostHog ===
# Off by default. With POSTHOG_API_KEY unset the integration is fully disabled
# (no JS shipped to the browser, no client init, no network). Setting the key
# enables backend exception capture, LLM call tracing ($ai_generation events),
# frontend errors / pageviews, masked session replay, and feature flags.
# Operator guide: docs/observability.md.
#
# POSTHOG_API_KEY must be a PROJECT (publishable, "phc_...") key. The project
# key is embedded in the browser snippet — do NOT use a personal API key here.
# POSTHOG_API_KEY=phc_xxx
#
# Default points at PostHog's EU Cloud endpoint. Override for the US region or
# a self-hosted deployment.
# POSTHOG_HOST=https://eu.i.posthog.com
#
# Identification mode for logged-in users:
#   none  - never identify; distinct_id is a random cookie
#   id    - identify by user.id only (no PII)
#   email - identify by user.id + email (default)
#   full  - id + email + name
# POSTHOG_IDENTIFY_PII=email
#
# Disable session replay even when the integration is on (errors / events /
# flags still flow). Default true.
# POSTHOG_REPLAY=true
#
# Append a CSS selector to the default replay mask list. Useful when a custom
# template introduces a new sensitive surface (e.g. .customer-pii). The default
# masks: [data-sensitive], .data-cell, .query-result, .sql-output, code, pre.
# POSTHOG_REPLAY_MASK_SELECTOR=

# Ship prompt + completion bodies inside $ai_generation events. Off by default
# because LLM prompts in this product routinely include customer SQL / data.
# Token counts and latency always flow regardless.
# POSTHOG_LLM_PAYLOADS=0

# Environment label tagged on every captured event (super property).
# Use it in PostHog dashboards to split local / dev / staging / production.
# Resolution order when unset: LOCAL_DEV_MODE=1 -> "local"; else
# RELEASE_CHANNEL value; else AGNES_DEPLOYMENT_ENV; else "unknown".
# POSTHOG_ENVIRONMENT=production