diff --git a/app/main.py b/app/main.py index 371235f..42b3186 100644 --- a/app/main.py +++ b/app/main.py @@ -340,7 +340,8 @@ def create_app() -> FastAPI: app.add_middleware(RequestIdMiddleware) # Load .env_overlay (persisted by /api/admin/configure) - _overlay = Path(os.environ.get("DATA_DIR", "./data")) / "state" / ".env_overlay" + from app.secrets import _state_dir + _overlay = _state_dir() / ".env_overlay" if _overlay.exists(): for line in _overlay.read_text().splitlines(): if "=" in line and not line.startswith("#"): diff --git a/app/secrets.py b/app/secrets.py index 41f837d..cb88740 100644 --- a/app/secrets.py +++ b/app/secrets.py @@ -7,13 +7,25 @@ from pathlib import Path logger = logging.getLogger(__name__) +def _state_dir() -> Path: + """Return path to writable state directory. + + STATE_DIR env var takes precedence; otherwise defaults to + ${DATA_DIR}/state for backward compatibility with deployments + that nest state under the data disk. See docs/state-dir.md. + """ + state = os.environ.get("STATE_DIR", "") + if state: + return Path(state) + return Path(os.environ.get("DATA_DIR", "./data")) / "state" + + def _load_or_generate(env_var: str, file_name: str) -> str: """Load secret from env var, or from file, or generate and persist.""" val = os.environ.get(env_var, "") if val: return val - data_dir = Path(os.environ.get("DATA_DIR", "./data")) - secret_path = data_dir / "state" / file_name + secret_path = _state_dir() / file_name if secret_path.exists(): val = secret_path.read_text().strip() if val: diff --git a/docker-compose.flat-mount.yml b/docker-compose.flat-mount.yml new file mode 100644 index 0000000..a95d7aa --- /dev/null +++ b/docker-compose.flat-mount.yml @@ -0,0 +1,88 @@ +# Flat-mount overlay — parallel host binds for /data and /data-state. +# +# Why this overlay +# ---------------- +# The default deployment topology nests state under data: sdb at /data, +# sdc at /data/state (i.e. /data/state is a separate disk mounted INSIDE +# the data disk). That layout works but has known fragility: +# +# - Bind-mount propagation matters. A non-recursive bind hides the +# nested mount, leading to silent shadow writes (the failure mode +# that caused 2026-05-05 in the Groupon FoundryAI deployment). +# +# - Two writers, one tree. Host-side timers (tls-rotate.timer) +# write to /data/state/certs as root, while the container app +# writes to /data/state/system.duckdb as uid 999. Same prefix, +# different mount-namespace views = ownership conflicts. +# +# - sdb resize requires umounting sdc first. Mount-order coupling. +# +# This overlay removes the nesting by mounting the state disk in +# PARALLEL to the data disk: +# +# sdb at /data (analytics, regenerable) +# sdc at /data-state (DuckDB, secrets, certs — irreplaceable) +# +# Both are direct service-level binds, recursive by default in modern +# Docker Engine. No volume options to forget. No nested propagation. +# No two-writer collision (app uses /data-state, host scripts also use +# /data-state — same path, single namespace). +# +# Usage +# ----- +# 1. On the operator's host: mount the config disk at /data-state +# (instead of /data/state). Update fstab. Move existing state +# contents from /data/state to /data-state. +# +# 2. In /opt/agnes/.env, set STATE_DIR=/data-state. The app's secrets +# module + DuckDB code, plus the host-side rotate.sh and +# auto-upgrade.sh scripts, all read this var. +# +# 3. Compose invocation: +# +# docker compose \ +# -f docker-compose.yml \ +# -f docker-compose.prod.yml \ +# -f docker-compose.flat-mount.yml \ +# up -d +# +# Note: this overlay is mutually exclusive with docker-compose.host-mount.yml. +# Pick one based on your disk topology. +# +# Do NOT use this overlay in CI — /data and /data-state do not exist +# on GitHub runners. + +services: + app: + volumes: !override + - /data:/data + - /data-state:/data-state + - ./config:/app/config:ro + + extract: + volumes: !override + - /data:/data + - /data-state:/data-state + - ./config:/app/config:ro + + scheduler: + volumes: !override + - /data:/data + - /data-state:/data-state + - ./config:/app/config:ro + + telegram-bot: + volumes: !override + - /data:/data + - /data-state:/data-state + + ws-gateway: + volumes: !override + - /data:/data + - /data-state:/data-state + + caddy: + volumes: !override + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - /data-state/certs:/certs:ro + - caddy_data:/data diff --git a/docs/state-dir.md b/docs/state-dir.md new file mode 100644 index 0000000..7e40749 --- /dev/null +++ b/docs/state-dir.md @@ -0,0 +1,116 @@ +# State directory layout + +Agnes splits its persistent data into two tiers: + +| Tier | Path | Contents | Backup posture | +|---|---|---|---| +| **data** | `/data` | analytics workspace, extracts, DuckDB caches | regenerable | +| **state** | `${STATE_DIR}` | `system.duckdb`, `.session_secret`, `.jwt_secret`, `certs/*` | irreplaceable | + +`STATE_DIR` is an environment variable that selects the host path the state tier is mounted from. Two layouts are supported: + +## Layout A — nested (legacy default) + +``` +sdb at /data +sdc at /data/state (nested inside the data mount) +``` + +`STATE_DIR=/data/state` (or unset — that's the default). Used by the original deployment topology. + +**Pros**: single bind mount per service (`/data:/data` recursive). Single env var defaults work. + +**Cons**: +- Bind-mount propagation matters. Non-recursive bind silently shadows the nested sdc mount, causing the app to write to an invisible subdirectory of sdb. Recovery requires `docker volume rm` + manual data migration. +- Two writers (host's `tls-rotate.timer` running as root; container app running as uid 999) share `/data/state` with different mount-namespace views → ownership conflicts. +- Resizing sdb requires unmounting sdc first. + +The 2026-05-05 incident in the Groupon FoundryAI deployment was a manifestation of the propagation gotcha. See PRs in this repo and the deployer's infra repo for full root-cause notes. + +## Layout B — flat + +``` +sdb at /data (analytics, regenerable) +sdc at /data-state (state, irreplaceable — parallel to /data, not nested) +``` + +`STATE_DIR=/data-state`. Two parallel host binds per service: `/data:/data` and `/data-state:/data-state`. Use the `docker-compose.flat-mount.yml` overlay. + +**Pros**: +- No nested-mount propagation class. Each disk is its own bind. +- Single writer per disk (host scripts → certs on sdc, container app → DuckDB on sdc; both at the same path). +- sdb resize doesn't touch sdc. +- Direct service binds default to recursive in modern Docker — no `driver_opts` immutability footgun. + +**Cons**: +- One-time per-VM migration: tear down `/data/state` mount, mount sdc at `/data-state` instead, copy state contents. +- Two binds per service (slightly more compose YAML). + +## Choosing + +| Situation | Recommendation | +|---|---| +| Existing deployment, no plans to expand | stay on layout A | +| New deployment | layout B (cleaner, no shadow class) | +| Existing deployment hit by 2026-05-05 shadow class | migrate to layout B | +| CI / local dev | neither (use ephemeral compose volumes) | + +## Migration A → B + +Steps to move an existing VM from nested to flat: + +```bash +# 1. Stop containers +sudo docker compose --env-file /opt/agnes/.env \ + -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.host-mount.yml \ + --profile tls down + +# 2. Snapshot the existing state +sudo cp -a /data/state /tmp/state-backup-$(date -u +%Y%m%dT%H%M%SZ) + +# 3. Unmount sdc from /data/state (its current nested location) +sudo umount /data/state +sudo rmdir /data/state # remove the now-empty mount point on sdb + +# 4. Create the new flat mount point and remount sdc there +sudo mkdir /data-state +echo "LABEL=agnes-state /data-state ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab +# (also remove the old /data/state line from fstab) +sudo mount /data-state + +# 5. Restore state from the backup +sudo cp -a /tmp/state-backup-*/. /data-state/ + +# 6. Set STATE_DIR in /opt/agnes/.env +echo "STATE_DIR=/data-state" | sudo tee -a /opt/agnes/.env + +# 7. Bring the stack back up with the flat overlay +cd /opt/agnes +sudo docker compose --env-file /opt/agnes/.env \ + -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.flat-mount.yml \ + --profile tls up -d +``` + +Verify: `sudo docker exec agnes-app-1 ls /data-state` should show `system.duckdb` etc. + +## What reads `STATE_DIR` + +App code: +- `src/db.py::_get_state_dir()` — the canonical helper. Used by `get_system_db()` and the schema migration snapshot. +- `app/secrets.py::_state_dir()` — for `.session_secret`, `.jwt_secret`. Mirrors the helper since `app/` shouldn't import from `src/`. +- `app/main.py` — for the `.env_overlay` startup file (loaded at process start). + +Host scripts: +- `scripts/ops/agnes-auto-upgrade.sh` — mount-sanity check + cert detection. +- `scripts/ops/agnes-tls-rotate.sh` — `CERT_DIR=$STATE_DIR/certs`. + +Both scripts source `/opt/agnes/.env` with `set -a`, so adding `STATE_DIR=/data-state` to that file propagates everywhere. + +## Caddy cert mount + +Caddy mounts the cert directory from the host at `/certs:ro`. The host-side path follows `STATE_DIR/certs`: + +- Layout A: `/data/state/certs` (in `docker-compose.yml` directly). +- Layout B: `/data-state/certs` (overridden in `docker-compose.flat-mount.yml`). + +Compose-time env substitution happens at `compose up`, not at runtime, so the overlay must be selected at deploy time — there's no single compose YAML that switches based on `STATE_DIR`. diff --git a/scripts/ops/agnes-auto-upgrade.sh b/scripts/ops/agnes-auto-upgrade.sh index 54bdf43..7bc4358 100755 --- a/scripts/ops/agnes-auto-upgrade.sh +++ b/scripts/ops/agnes-auto-upgrade.sh @@ -3,50 +3,58 @@ # Cron fires it every 5 min; pulls latest image for the pinned AGNES_TAG # and recreates containers only if the digest moved. # -# Cert-aware: if /data/state/certs/{fullchain,privkey}.pem both exist +# Cert-aware: if ${STATE_DIR}/certs/{fullchain,privkey}.pem both exist # (populated by agnes-tls-rotate.sh), enables the tls overlay so Caddy # fronts :443. Absence → plain HTTP on :8000. +# +# STATE_DIR is the host path that backs the writable state disk. It +# defaults to /data/state for backward compatibility with the legacy +# nested-mount layout (sdb at /data, sdc nested under /data/state). +# Set STATE_DIR=/data-state in /opt/agnes/.env for the flat layout +# (sdb at /data, sdc parallel at /data-state) — see docs/state-dir.md. set -euo pipefail cd /opt/agnes # shellcheck disable=SC1091 set -a; . /opt/agnes/.env; set +a +STATE_DIR="${STATE_DIR:-/data/state}" + # Fail-fast guard: if the VM has a config disk attached, it MUST be -# mounted at /data/state before any container action. Otherwise the -# app would write state onto /data (sdb) and lose it on the next -# container recreate — the regression that motivated this guard. +# mounted at $STATE_DIR before any container action. Otherwise the +# app would write state onto the parent filesystem and lose it on the +# next container recreate — the regression that motivated this guard. # Three retries (mount may race with udev on cold boot) then hard exit. CONFIG_DEVICE=/dev/disk/by-id/google-config-disk if [ -e "$CONFIG_DEVICE" ]; then attempt=0 while [ $attempt -lt 3 ]; do attempt=$((attempt + 1)) - if mountpoint -q /data/state; then + if mountpoint -q "$STATE_DIR"; then expected_dev=$(readlink -f "$CONFIG_DEVICE") - actual_dev=$(findmnt -n -o SOURCE /data/state) + actual_dev=$(findmnt -n -o SOURCE "$STATE_DIR") if [ "$expected_dev" = "$actual_dev" ]; then break fi - logger -t agnes-auto-upgrade "WARN: /data/state on $actual_dev, expected $expected_dev — attempting remount" - umount /data/state 2>/dev/null || true + logger -t agnes-auto-upgrade "WARN: $STATE_DIR on $actual_dev, expected $expected_dev — attempting remount" + umount "$STATE_DIR" 2>/dev/null || true fi - mount "$CONFIG_DEVICE" /data/state 2>/dev/null || true + mount "$CONFIG_DEVICE" "$STATE_DIR" 2>/dev/null || true sleep $((attempt * 2)) done - if ! mountpoint -q /data/state || \ - [ "$(readlink -f "$CONFIG_DEVICE")" != "$(findmnt -n -o SOURCE /data/state)" ]; then - logger -t agnes-auto-upgrade "FATAL: config disk not mounted at /data/state — refusing to start containers" - echo "FATAL: /data/state is not backed by the config disk." >&2 - echo " Refusing to run docker compose — app state must NEVER land on /data (sdb)." >&2 - echo " Inspect: mount | grep /data/state ; ls /dev/disk/by-id/google-config-disk" >&2 + if ! mountpoint -q "$STATE_DIR" || \ + [ "$(readlink -f "$CONFIG_DEVICE")" != "$(findmnt -n -o SOURCE "$STATE_DIR")" ]; then + logger -t agnes-auto-upgrade "FATAL: config disk not mounted at $STATE_DIR — refusing to start containers" + echo "FATAL: $STATE_DIR is not backed by the config disk." >&2 + echo " Refusing to run docker compose — app state must land on the config disk, not the parent filesystem." >&2 + echo " Inspect: mount | grep $STATE_DIR ; ls /dev/disk/by-id/google-config-disk" >&2 exit 1 fi # Re-apply propagation in case a prior container teardown reset it. # Idempotent — safe to call when already private. mount --make-rprivate /data 2>/dev/null || true - mount --make-rprivate /data/state 2>/dev/null || true + mount --make-rprivate "$STATE_DIR" 2>/dev/null || true fi IMAGE="ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}" @@ -116,10 +124,10 @@ CONFIG_AFTER=$(hash_config_files) # Evaluated AFTER the config re-fetch above so a freshly-added or # freshly-removed Caddyfile is reflected in this tick's compose set, # not the next one. -if [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ] && [ -s Caddyfile ]; then +if [ -s "$STATE_DIR/certs/fullchain.pem" ] && [ -s "$STATE_DIR/certs/privkey.pem" ] && [ -s Caddyfile ]; then COMPOSE_FILES+=( -f docker-compose.tls.yml ) PROFILE_ARGS=( --profile tls ) -elif [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ]; then +elif [ -s "$STATE_DIR/certs/fullchain.pem" ] && [ -s "$STATE_DIR/certs/privkey.pem" ]; then logger -t agnes-auto-upgrade "WARN: certs present but Caddyfile missing/empty — skipping tls overlay" fi diff --git a/scripts/ops/agnes-tls-rotate.sh b/scripts/ops/agnes-tls-rotate.sh index a4f6ac9..39a2166 100755 --- a/scripts/ops/agnes-tls-rotate.sh +++ b/scripts/ops/agnes-tls-rotate.sh @@ -34,7 +34,12 @@ set -a; . /opt/agnes/.env; set +a [ -n "${TLS_FULLCHAIN_URL:-}" ] || { echo "TLS_FULLCHAIN_URL empty — nothing to rotate"; exit 0; } -CERT_DIR=/data/state/certs +# STATE_DIR is the host path that backs the writable state disk. Defaults +# to /data/state for backward compatibility with the legacy nested-mount +# layout; set STATE_DIR=/data-state in /opt/agnes/.env for the flat layout. +# See docs/state-dir.md. +STATE_DIR="${STATE_DIR:-/data/state}" +CERT_DIR="$STATE_DIR/certs" mkdir -p "$CERT_DIR" # Match the agnes UID baked into the app image (Dockerfile: useradd --uid 999). # Without this, whoever happens to win the create race (this script as root diff --git a/src/db.py b/src/db.py index 2591244..f240463 100644 --- a/src/db.py +++ b/src/db.py @@ -453,6 +453,23 @@ def _get_data_dir() -> Path: return Path(os.environ.get("DATA_DIR", "./data")) +def _get_state_dir() -> Path: + """Return path to writable state directory. + + Resolution order: + 1. STATE_DIR env var (explicit override). + 2. ${DATA_DIR}/state (default — current behavior). + + Use the explicit override when the deployer wants state on a + separate disk mounted in parallel with /data rather than nested + inside it. See docs/state-dir.md. + """ + state = os.environ.get("STATE_DIR", "") + if state: + return Path(state) + return _get_data_dir() / "state" + + def get_system_db() -> duckdb.DuckDBPyConnection: """Get a connection to the system state database. @@ -461,7 +478,7 @@ def get_system_db() -> duckdb.DuckDBPyConnection: so callers can safely close() it without closing the underlying connection. """ global _system_db_conn, _system_db_path - db_path = str(_get_data_dir() / "state" / "system.duckdb") + db_path = str(_get_state_dir() / "system.duckdb") with _system_db_lock: if _system_db_conn is None or _system_db_path != db_path: @@ -1812,7 +1829,7 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None: # Snapshot before migration for rollback support if current > 0: try: - db_path = Path(os.environ.get("DATA_DIR", "./data")) / "state" / "system.duckdb" + db_path = _get_state_dir() / "system.duckdb" if db_path.exists(): # Flush WAL to main DB file before copying try: