From 1263b807261da48dcf1a68d4311456d22294c188 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Mon, 4 May 2026 13:28:03 +0200
Subject: [PATCH] =?UTF-8?q?fix(query):=20#168=20review=20=E2=80=94=20concu?=
 =?UTF-8?q?rrent-slot=20wraps=20execute,=20doc/JS=20fixes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Devin Review on PR #168 found 5 issues — all real, all addressed.

🚩 ANALYSIS_001 (architectural): concurrent-slot guard didn't protect
actual BQ query execution. Earlier `_enforce_remote_bq_quota_and_cap`
ran dry-run + cap check inside `with quota.acquire(user_id):`, then
returned — releasing the slot BEFORE `analytics.execute(...)` ran. Spec
§4.3.3 explicitly designs the slot to wrap execute so the per-user
concurrent cap limits BQ scans, not just dry-runs.

Refactor to a context manager `_bq_quota_and_cap_guard`. Caller's `with`
block now holds the slot through dry-run, cap check, the actual
`analytics.execute(...)` (which is what triggers the BQ scan when DuckDB
resolves the master view), AND the post-flight record_bytes. Slot
released only when caller's `with` body exits.

🟡 BUG_001: placeholder JS walked `original` (full GET payload root)
instead of `original.sections`. `placeholder_from: ["data_source",
"bigquery", "project"]` is a section-relative path, so billing_project
placeholder NEVER rendered. Fix: walk `original.sections` (with fallback
to `original` for safety).

🟡 BUG_002 + BUG_003: admin_tables.html register and edit modals'
operator help text referenced `max_bytes_per_remote_query` (the old
name from the spec) but the actual config key is `bq_max_scan_bytes`
after the fix-up commit `6423888d` moved it. Replace both occurrences.

🟡 BUG_004: CHANGELOG entry said `api.query.bq_max_scan_bytes` (the
old path) but the read at app/api/query.py:53 is
`get_value("data_source", "bigquery", "bq_max_scan_bytes", ...)`. An
operator who set it under `api.query` in their yaml would have no
effect. Correct path in CHANGELOG.

All 95 #160-affected tests pass after the changes.
---
 CHANGELOG.md                               |  2 +-
 app/api/query.py                           | 84 ++++++++++++++--------
 app/web/templates/admin_server_config.html |  6 +-
 app/web/templates/admin_tables.html        |  4 +-
 4 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c89d3f9..9a75489 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,7 +20,7 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
   to the data_source Save button; on failure the inline result uses the
   same structured shape as the CLI renderer so operators see the same
   hint format admins do.
-- **`api.query.bq_max_scan_bytes` server-config knob** (default 5 GiB):
+- **`data_source.bigquery.bq_max_scan_bytes` server-config knob** (default 5 GiB):
   caps the BigQuery scan that `da query --remote` will issue against
   `query_mode='remote'` BQ rows. Exceeded queries are rejected with a
   structured `400 remote_scan_too_large` detail naming the bytes,
diff --git a/app/api/query.py b/app/api/query.py
index ce80abd..5ea2179 100644
--- a/app/api/query.py
+++ b/app/api/query.py
@@ -1,5 +1,6 @@
 """Query endpoint — execute SQL against server DuckDB."""
 
+import contextlib
 import logging
 import os
 import re
@@ -141,31 +142,41 @@ async def execute_query(
         if blocked_bq_path is not None:
             raise HTTPException(status_code=403, detail=blocked_bq_path)
 
-        if dry_run_set:
-            _enforce_remote_bq_quota_and_cap(
-                user_id=user.get("id") or user.get("email") or "anon",
-                dry_run_set=dry_run_set,
-                sql=request.sql,
+        # Issue #160 §4.3.3 — concurrent-slot guard MUST wrap the actual
+        # `analytics.execute(request.sql)` call (which is what triggers the
+        # BQ scan when DuckDB resolves the master view), not just the
+        # dry-run. Devin Review on PR #168 caught this — earlier
+        # implementation released the slot before execute. Use a context
+        # manager so dry-run + cap check + execute + record_bytes all run
+        # inside the slot.
+        user_id = user.get("id") or user.get("email") or "anon"
+        guard = (
+            _bq_quota_and_cap_guard(
+                user_id=user_id, dry_run_set=dry_run_set, sql=request.sql,
             )
+            if dry_run_set
+            else contextlib.nullcontext()
+        )
+        with guard:
+            # Open in read-only mode for extra safety
+            result = analytics.execute(request.sql).fetchmany(request.limit + 1)
+            columns = [desc[0] for desc in analytics.description] if analytics.description else []
+            truncated = len(result) > request.limit
+            rows = result[:request.limit]
 
-        # Open in read-only mode for extra safety
-        result = analytics.execute(request.sql).fetchmany(request.limit + 1)
-        columns = [desc[0] for desc in analytics.description] if analytics.description else []
-        truncated = len(result) > request.limit
-        rows = result[:request.limit]
-
-        # Post-flight: bill the dry-run estimate against the user's daily
-        # quota. Do this AFTER execute so a downstream failure (e.g. BQ
-        # outage) doesn't strand the user with charged-but-unrun bytes.
-        if dry_run_set:
-            user_id = user.get("id") or user.get("email") or "anon"
-            try:
-                _build_quota_tracker().record_bytes(
-                    user_id, sum(b for _, _, b in dry_run_set),
-                )
-            except Exception:
-                # record_bytes is documented as never-raising; defensive guard.
-                logger.warning("quota record_bytes failed for user=%s", user_id)
+            # Post-flight: bill the dry-run estimate against the user's daily
+            # quota. Do this AFTER execute so a downstream failure (e.g. BQ
+            # outage) doesn't strand the user with charged-but-unrun bytes.
+            # Stays inside the `with quota.acquire(...)` block so the slot
+            # release happens after record_bytes completes.
+            if dry_run_set:
+                try:
+                    _build_quota_tracker().record_bytes(
+                        user_id, sum(b for _, _, b in dry_run_set),
+                    )
+                except Exception:
+                    # record_bytes is documented as never-raising; defensive guard.
+                    logger.warning("quota record_bytes failed for user=%s", user_id)
 
         # Convert to serializable types
         serializable_rows = []
@@ -360,14 +371,27 @@ def _bq_guardrail_inputs(
     return dry_run, None
 
 
-def _enforce_remote_bq_quota_and_cap(*, user_id: str, dry_run_set: list, sql: str) -> None:
+@contextlib.contextmanager
+def _bq_quota_and_cap_guard(*, user_id: str, dry_run_set: list, sql: str):
     """Pre-flight check + dry-run + cap enforcement for /api/query BQ paths.
 
+    Context-manager shape (Devin Review #5 on PR #168). Earlier implementation
+    ran the dry-run + cap check inside `with quota.acquire(user_id):`, then
+    returned — releasing the concurrent slot BEFORE the actual BQ-touching
+    `analytics.execute(...)` ran. Spec §4.3.3 wants execute to be inside the
+    slot so the per-user concurrent cap actually limits BQ scans, not just
+    dry-runs.
+
+    Now: the helper is a context manager that yields after the cap check.
+    The caller's `with` block holds the slot through both dry-run AND the
+    subsequent `analytics.execute(...)` until the body exits.
+
     1. `check_daily_budget` — over-cap users get 429 BEFORE any BQ work.
-    2. `with quota.acquire(user_id)` — concurrent slot guard.
-    3. Dry-run each `(bucket, source_table)` via the existing
-       `_bq_dry_run_bytes` helper. Sum bytes.
-    4. If sum > cap → 400 `remote_scan_too_large` with structured detail.
+    2. `quota.acquire(user_id)` opened — concurrent-slot held throughout.
+    3. Dry-run each `(bucket, source_table)` via `_bq_dry_run_bytes`.
+    4. If sum > cap → 400 `remote_scan_too_large`.
+    5. Yield. Caller runs `analytics.execute(...)` + `record_bytes(...)`.
+    6. On exit, slot released.
 
     Mutates `dry_run_set` in place: the third tuple element (bytes) is
     populated with the per-path dry-run result so the caller can sum and
@@ -424,3 +448,7 @@ def _enforce_remote_bq_quota_and_cap(*, user_id: str, dry_run_set: list, sql: st
                     "the snapshot locally."
                 ),
             })
+
+        # Yield control to the handler — slot stays acquired while the
+        # caller runs analytics.execute() + record_bytes().
+        yield total_bytes
diff --git a/app/web/templates/admin_server_config.html b/app/web/templates/admin_server_config.html
index 1dbf1c4..a3efa09 100644
--- a/app/web/templates/admin_server_config.html
+++ b/app/web/templates/admin_server_config.html
@@ -322,9 +322,13 @@ function renderLeafInput(fieldId, section, pathSegments, kind, value, opts, isUn
   // surface the access.py:339-340 billing→data fallback in the UI.
   let placeholderAttr = "";
   if (isUnset && opts && opts.spec && Array.isArray(opts.spec.placeholder_from)) {
+    // `original` is the full GET /api/admin/server-config response shape:
+    // {sections: {data_source: ...}, editable_sections: [...], ...}.
+    // `placeholder_from` is a section-relative path (e.g. ["data_source",
+    // "bigquery", "project"]) so walk `original.sections` not `original`.
     const resolved = opts.spec.placeholder_from.reduce(
       (cur, k) => (cur && typeof cur === "object" ? cur[k] : undefined),
-      original,
+      original && original.sections ? original.sections : original,
     );
     if (resolved !== undefined && resolved !== null && resolved !== "") {
       placeholderAttr = ` placeholder="(defaults to ${escHtml(String(resolved))})"`;
diff --git a/app/web/templates/admin_tables.html b/app/web/templates/admin_tables.html
index 2e3aa17..75d7f5f 100644
--- a/app/web/templates/admin_tables.html
+++ b/app/web/templates/admin_tables.html
@@ -894,7 +894,7 @@
                                 <br><strong>Live access:</strong> BASE TABLEs query via
                                 <code>bq."dataset"."table"</code> (Storage Read API; predicate pushdown).
                                 VIEWs and MATERIALIZED_VIEWs query via the BQ jobs API (full-scan estimate;
-                                cost-guarded by <code>max_bytes_per_remote_query</code>).
+                                cost-guarded by <code>bq_max_scan_bytes</code>).
                                 <code>da query --remote</code> works for both.
                                 <br><strong>Synced access:</strong> handles both table and view transparently
                                 — the scheduler runs <code>SELECT *</code> through the jobs API and writes a
@@ -1051,7 +1051,7 @@
                                 <br><strong>Live access:</strong> BASE TABLEs query via
                                 <code>bq."dataset"."table"</code> (Storage Read API; predicate pushdown).
                                 VIEWs and MATERIALIZED_VIEWs query via the BQ jobs API (full-scan estimate;
-                                cost-guarded by <code>max_bytes_per_remote_query</code>).
+                                cost-guarded by <code>bq_max_scan_bytes</code>).
                                 <code>da query --remote</code> works for both.
                                 <br><strong>Synced access:</strong> handles both transparently — the
                                 scheduler runs <code>SELECT *</code> through the jobs API and writes a