From 28aba4c1f9db596a484854de180fd1bb0a6d7413 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Mon, 4 May 2026 13:51:36 +0200
Subject: [PATCH] =?UTF-8?q?fix(query):=20#168=20review=20iter=203=20?=
 =?UTF-8?q?=E2=80=94=20RBAC=20name-vs-id,=20placeholder=20dead=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed.

🔴 RBAC check at app/api/query.py:362 used `row["name"]` against
`accessible_set`, but `accessible_set` is keyed by registry IDs
(`get_accessible_tables` returns `resource_grants.resource_id` —
table IDs, not display names). Confirmed by `_table_blocks` projection
at `app/resource_types.py:157-158`. When `id != name` (e.g.
`id="bq.finance.ue", name="ue"`), non-admin users with valid grants
got 403 `bq_path_access_denied`. Switch to `row["id"]`.

🚩 Bare-name pass at app/api/query.py:332 had the same name-vs-id
mismatch (different impact): legitimate accessible rows were skipped
from `dry_run_set`, so the cost guardrail under-counted scan bytes
for non-admin users. Could let an over-cap query through and
under-bill quota. Switch to `row_id` comparison.

🟡 `placeholder_from` for billing_project was dead code.
`_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty
string into every GET payload via `_ensure_bq_optional_fields`. JS
`isUnset = (value === undefined)` evaluated False, so the
`(defaults to <project>)` placeholder NEVER rendered. Drop the seed —
field stays in `known_fields` (UI sees it) but routes through the
unset rendering path on GET, where placeholder_from fires.

Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped
from "billing_project IS present" to "billing_project NOT auto-seeded"
to lock in the new shape. 67 affected tests pass.
---
 app/api/admin.py                  |  8 +++++++-
 app/api/query.py                  | 21 +++++++++++++++++----
 tests/test_admin_server_config.py |  8 +++++++-
 3 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/app/api/admin.py b/app/api/admin.py
index 947a712..ba9a9b4 100644
--- a/app/api/admin.py
+++ b/app/api/admin.py
@@ -803,7 +803,13 @@ class ServerConfigUpdateRequest(BaseModel):
 #   - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'`
 #     (default 10 GiB; 0 disables; null falls through to the default).
 _BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = {
-    "billing_project": "",
+    # `billing_project` intentionally NOT seeded here. The empty-string
+    # default would inject `billing_project: ""` into every GET payload,
+    # which makes the JS `isUnset = (value === undefined)` check evaluate
+    # False — and the `(defaults to <project>)` placeholder feature
+    # (#160 §4.7.5) would never render. Leaving it absent keeps the
+    # field in the unset rendering path so placeholder_from fires.
+    # Devin Review iter #3 on PR #168.
     "max_bytes_per_materialize": 10737418240,
     "bq_max_scan_bytes": 5368709120,
 }
diff --git a/app/api/query.py b/app/api/query.py
index 5cb67fb..549a31b 100644
--- a/app/api/query.py
+++ b/app/api/query.py
@@ -318,6 +318,15 @@ def _bq_guardrail_inputs(
     # 1. Bare-name pass: look up registered remote-BQ names that appear in
     # the user SQL as word-boundary tokens. Reuses the same regex shape as
     # the existing forbidden-table loop above.
+    #
+    # `accessible_set` comes from `get_accessible_tables()` which returns
+    # `resource_grants.resource_id` values — i.e. table registry IDs, NOT
+    # display names. Devin Review iter #3 caught the mismatch: when
+    # `id != name` (e.g. id="bq.finance.ue", name="ue"), legitimate
+    # accessible rows were skipped, under-counting dry-run bytes for the
+    # cost cap. The user SQL still references the display `name` (that's
+    # what shows in `da catalog`), so the regex match below uses `name`,
+    # but the access gate uses `id`.
     dry_run: list = []
     seen_paths: set = set()
     accessible_set = set(allowed) if allowed is not None else None
@@ -327,9 +336,10 @@ def _bq_guardrail_inputs(
         bucket = r.get("bucket")
         source_table = r.get("source_table")
         name = r.get("name")
-        if not (bucket and source_table and name):
+        row_id = r.get("id")
+        if not (bucket and source_table and name and row_id):
             continue
-        if accessible_set is not None and name not in accessible_set:
+        if accessible_set is not None and row_id not in accessible_set:
             # Forbidden-table loop above will have rejected the request
             # before we get here. Defensive skip.
             continue
@@ -357,9 +367,12 @@ def _bq_guardrail_inputs(
                     "registered name from `da catalog`."
                 ),
             }
-        # Row exists. Per-name grant check (non-admin only).
+        # Row exists. Per-id grant check (non-admin only).
+        # `accessible_set` is keyed by registry id (resource_grants
+        # resource_id), so use `row["id"]` here, not display name.
+        # Devin Review iter #3.
         if not is_admin:
-            if accessible_set is None or row["name"] not in accessible_set:
+            if accessible_set is None or row["id"] not in accessible_set:
                 return [], {
                     "reason": "bq_path_access_denied",
                     "path": f'bq."{bucket_raw}"."{source_table_raw}"',
diff --git a/tests/test_admin_server_config.py b/tests/test_admin_server_config.py
index b1a1eed..b339aa1 100644
--- a/tests/test_admin_server_config.py
+++ b/tests/test_admin_server_config.py
@@ -838,7 +838,13 @@ class TestServerConfigBigQueryFields:
         resp = c.get("/api/admin/server-config", headers=_auth(token))
         assert resp.status_code == 200, resp.text
         bq = resp.json()["sections"]["data_source"]["bigquery"]
-        assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
+        # `billing_project` intentionally NOT auto-seeded into GET payload
+        # — that would inject empty string and break the placeholder_from
+        # render path. The field still appears in `known_fields` (so the
+        # UI knows about it) and renders via the unset path with the
+        # `(defaults to <project>)` placeholder. Devin Review iter #3.
+        assert "billing_project" not in bq, \
+            f"billing_project should not be auto-seeded in sections; got: {bq}"
         assert "max_bytes_per_materialize" in bq, \
             f"max_bytes_per_materialize missing from GET: {bq}"
         # legacy_wrap_views was removed in #160; UI must NOT surface it any