diff --git a/Caddyfile b/Caddyfile index 9a39211..d8e33c6 100644 --- a/Caddyfile +++ b/Caddyfile @@ -60,8 +60,21 @@ # agnes data dir is mounted at /srv (read-only) instead — see the # `data:/srv:ro` line in docker-compose.yml's caddy service. The # root + try_files combo therefore probes /srv/extracts/... + # + # Devin Review caught: `try_files A B C` rewrites the URI to its + # LAST entry when no file matches (per Caddy docs). Without an + # explicit "rewrite back to original URI" fallback, a parquet + # missing from all three known paths would get rewritten to the + # last static candidate (`/jira/data/.parquet`), and the + # reverse_proxy below would forward THAT rewritten URI to + # app:8000 → app has no such route → 404. To make the documented + # "missed → falls through to app handler" promise hold, append + # the original `/api/data//download` path as the final + # try_files entry: when no file matches, the URI is rewritten + # back to the analyst-facing path and the app's `download_table` + # handler picks it up via the reverse_proxy fallback below. root * /srv/extracts - try_files /bigquery/data/{re.tid.1}.parquet /keboola/data/{re.tid.1}.parquet /jira/data/{re.tid.1}.parquet + try_files /bigquery/data/{re.tid.1}.parquet /keboola/data/{re.tid.1}.parquet /jira/data/{re.tid.1}.parquet /api/data/{re.tid.1}/download @found file handle @found { header Content-Disposition "attachment; filename=\"{re.tid.1}.parquet\"" diff --git a/config/claude_md_template.txt b/config/claude_md_template.txt index 31c78d3..58c1a64 100644 --- a/config/claude_md_template.txt +++ b/config/claude_md_template.txt @@ -28,22 +28,31 @@ This workspace is connected to {{ server.url }}. - **Personal customizations go in `.claude/CLAUDE.local.md`, NOT here.** This file is regenerated by `agnes init --force`; edits here will be lost. CLAUDE.local.md is preserved across regeneration and uploaded on `agnes push`. ## Metrics Workflow -1. `agnes catalog --metrics` — find the relevant metric ({{ metrics.count }} available, categories: {{ metrics.categories | join(", ") or "none yet" }}) -2. `agnes catalog --metrics --show /` — read SQL and business rules -3. Use the canonical SQL from the metric definition, adapt to the question -4. Never invent metric calculations — always check existing definitions first +1. `agnes catalog --metrics` — list registered metrics + categories +2. `agnes catalog --metrics --show /` — read the canonical SQL + business rules +3. Adapt the canonical SQL; never invent metric calculations ## Data Sync - `agnes pull` — download current data from server - `agnes push` — upload sessions and local notes to server - Data on the server refreshes every {{ sync_interval }} -## Available Datasets -{% for t in tables -%} -- `{{ t.name }}`{% if t.description %} — {{ t.description }}{% endif %}{% if t.query_mode == "remote" %} *(remote, queried on demand)*{% endif %} -{% else -%} -- _No tables registered yet — ask an admin to register tables in the dashboard._ -{% endfor %} +## Discovering tables — never enumerate from memory + +Tables, columns, sizes, and `query_mode` change as admins register / migrate / +drop entries. Always re-discover from the live server, never from this file: + +``` +agnes catalog --json # canonical list with query_mode, sql_flavor, + # where_examples, fetch_via, rough_size_hint per table +agnes schema # columns + types in the right SQL dialect +agnes describe
-n 5 # sample rows (local + materialized only) +``` + +`rough_size_hint` is server-populated for `local` and `materialized` tables +(`small` ≤100 MiB, `medium` ≤1 GiB, `large` ≤10 GiB, `very_large` >10 GiB) and +`null` for `remote` rows. When `null`, treat the table as potentially large +and use `agnes snapshot create --estimate` to size-check before fetching. {% if marketplaces -%} ## Plugins available to you diff --git a/scripts/ops/agnes-auto-upgrade.sh b/scripts/ops/agnes-auto-upgrade.sh index 2dbd922..54bdf43 100755 --- a/scripts/ops/agnes-auto-upgrade.sh +++ b/scripts/ops/agnes-auto-upgrade.sh @@ -53,25 +53,16 @@ IMAGE="ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable}" # Array form (vs. word-split string) — quoted expansion survives paths # with spaces and is the modern bash idiom. Functionally identical here # since /opt/agnes paths are tame, but it's a cheap habit to keep. +# +# The TLS-overlay decision deliberately runs BELOW the config re-fetch +# (Devin Review caught: this used to live here, evaluating Caddyfile +# existence against the PRE-fetch state. If the fetch added a +# previously-missing Caddyfile, this tick's docker compose would still +# omit `--profile tls` until the next 5-minute tick — a window where +# the recreate uses the wrong overlay set). Base file list is fine to +# initialise here because the tls overlay is the only conditional one. COMPOSE_FILES=( -f docker-compose.yml -f docker-compose.prod.yml -f docker-compose.host-mount.yml ) PROFILE_ARGS=() -# `-s` (size > 0) instead of `-f` — guards against the corner case where -# rotate.sh wrote a 0-byte cert and exited (or got SIGKILLed mid-write). -# Bringing up the tls profile against an empty cert would just crash -# Caddy on start; better to fall back to plain :8000 until rotate -# regenerates real bytes. Same `-s` rule for Caddyfile: without it (or -# with an empty one) the caddy service crash-loops while the tls overlay -# has already closed :8000 — net effect is "app unreachable". Skipping -# the overlay keeps the app on plain :8000 until config lands. -if [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ] && [ -s Caddyfile ]; then - COMPOSE_FILES+=( -f docker-compose.tls.yml ) - PROFILE_ARGS=( --profile tls ) -elif [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ]; then - logger -t agnes-auto-upgrade "WARN: certs present but Caddyfile missing/empty — skipping tls overlay" -fi -BEFORE=$(docker images --no-trunc --format '{{.Digest}}' "$IMAGE" | head -1) -docker compose "${COMPOSE_FILES[@]}" pull >/dev/null 2>&1 -AFTER=$(docker images --no-trunc --format '{{.Digest}}' "$IMAGE" | head -1) # Re-fetch the bind-mounted config files (compose overlays + Caddyfile) # from the OSS main branch on every tick. Without this, an image-only @@ -113,6 +104,29 @@ for f in "${CONFIG_FILES[@]}"; do done CONFIG_AFTER=$(hash_config_files) +# `-s` (size > 0) instead of `-f` — guards against the corner case where +# rotate.sh wrote a 0-byte cert and exited (or got SIGKILLed mid-write). +# Bringing up the tls profile against an empty cert would just crash +# Caddy on start; better to fall back to plain :8000 until rotate +# regenerates real bytes. Same `-s` rule for Caddyfile: without it (or +# with an empty one) the caddy service crash-loops while the tls overlay +# has already closed :8000 — net effect is "app unreachable". Skipping +# the overlay keeps the app on plain :8000 until config lands. +# +# Evaluated AFTER the config re-fetch above so a freshly-added or +# freshly-removed Caddyfile is reflected in this tick's compose set, +# not the next one. +if [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ] && [ -s Caddyfile ]; then + COMPOSE_FILES+=( -f docker-compose.tls.yml ) + PROFILE_ARGS=( --profile tls ) +elif [ -s /data/state/certs/fullchain.pem ] && [ -s /data/state/certs/privkey.pem ]; then + logger -t agnes-auto-upgrade "WARN: certs present but Caddyfile missing/empty — skipping tls overlay" +fi + +BEFORE=$(docker images --no-trunc --format '{{.Digest}}' "$IMAGE" | head -1) +docker compose "${COMPOSE_FILES[@]}" pull >/dev/null 2>&1 +AFTER=$(docker images --no-trunc --format '{{.Digest}}' "$IMAGE" | head -1) + if [ "$BEFORE" != "$AFTER" ] || [ "$CONFIG_BEFORE" != "$CONFIG_AFTER" ]; then REASON=() [ "$BEFORE" != "$AFTER" ] && REASON+=("image digest")