fix(query): #168 review iter 3 — RBAC name-vs-id, placeholder dead code

Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed. 🔴 RBAC check at app/api/query.py:362 used `row["name"]` against `accessible_set`, but `accessible_set` is keyed by registry IDs (`get_accessible_tables` returns `resource_grants.resource_id` — table IDs, not display names). Confirmed by `_table_blocks` projection at `app/resource_types.py:157-158`. When `id != name` (e.g. `id="bq.finance.ue", name="ue"`), non-admin users with valid grants got 403 `bq_path_access_denied`. Switch to `row["id"]`. 🚩 Bare-name pass at app/api/query.py:332 had the same name-vs-id mismatch (different impact): legitimate accessible rows were skipped from `dry_run_set`, so the cost guardrail under-counted scan bytes for non-admin users. Could let an over-cap query through and under-bill quota. Switch to `row_id` comparison. 🟡 `placeholder_from` for billing_project was dead code. `_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty string into every GET payload via `_ensure_bq_optional_fields`. JS `isUnset = (value === undefined)` evaluated False, so the `(defaults to <project>)` placeholder NEVER rendered. Drop the seed — field stays in `known_fields` (UI sees it) but routes through the unset rendering path on GET, where placeholder_from fires. Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped from "billing_project IS present" to "billing_project NOT auto-seeded" to lock in the new shape. 67 affected tests pass.
2026-05-04 13:51:36 +02:00 · 2026-05-04 13:51:36 +02:00 · 28aba4c1f9
commit 28aba4c1f9
parent 5eaa449fcc
3 changed files with 31 additions and 6 deletions
--- a/app/api/admin.py
+++ b/app/api/admin.py
@ -803,7 +803,13 @@ class ServerConfigUpdateRequest(BaseModel):
 #   - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'`
 #     (default 10 GiB; 0 disables; null falls through to the default).
 _BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = {
-    "billing_project": "",
+    # `billing_project` intentionally NOT seeded here. The empty-string
+    # default would inject `billing_project: ""` into every GET payload,
+    # which makes the JS `isUnset = (value === undefined)` check evaluate
+    # False — and the `(defaults to <project>)` placeholder feature
+    # (#160 §4.7.5) would never render. Leaving it absent keeps the
+    # field in the unset rendering path so placeholder_from fires.
+    # Devin Review iter #3 on PR #168.
    "max_bytes_per_materialize": 10737418240,
    "bq_max_scan_bytes": 5368709120,
 }
--- a/app/api/query.py
+++ b/app/api/query.py
@ -318,6 +318,15 @@ def _bq_guardrail_inputs(
    # 1. Bare-name pass: look up registered remote-BQ names that appear in
    # the user SQL as word-boundary tokens. Reuses the same regex shape as
    # the existing forbidden-table loop above.
+    #
+    # `accessible_set` comes from `get_accessible_tables()` which returns
+    # `resource_grants.resource_id` values — i.e. table registry IDs, NOT
+    # display names. Devin Review iter #3 caught the mismatch: when
+    # `id != name` (e.g. id="bq.finance.ue", name="ue"), legitimate
+    # accessible rows were skipped, under-counting dry-run bytes for the
+    # cost cap. The user SQL still references the display `name` (that's
+    # what shows in `da catalog`), so the regex match below uses `name`,
+    # but the access gate uses `id`.
    dry_run: list = []
    seen_paths: set = set()
    accessible_set = set(allowed) if allowed is not None else None
@ -327,9 +336,10 @@ def _bq_guardrail_inputs(
        bucket = r.get("bucket")
        source_table = r.get("source_table")
        name = r.get("name")
-        if not (bucket and source_table and name):
+        row_id = r.get("id")
+        if not (bucket and source_table and name and row_id):
            continue
-        if accessible_set is not None and name not in accessible_set:
+        if accessible_set is not None and row_id not in accessible_set:
            # Forbidden-table loop above will have rejected the request
            # before we get here. Defensive skip.
            continue
@ -357,9 +367,12 @@ def _bq_guardrail_inputs(
                    "registered name from `da catalog`."
                ),
            }
-        # Row exists. Per-name grant check (non-admin only).
+        # Row exists. Per-id grant check (non-admin only).
+        # `accessible_set` is keyed by registry id (resource_grants
+        # resource_id), so use `row["id"]` here, not display name.
+        # Devin Review iter #3.
        if not is_admin:
-            if accessible_set is None or row["name"] not in accessible_set:
+            if accessible_set is None or row["id"] not in accessible_set:
                return [], {
                    "reason": "bq_path_access_denied",
                    "path": f'bq."{bucket_raw}"."{source_table_raw}"',
--- a/tests/test_admin_server_config.py
+++ b/tests/test_admin_server_config.py
@ -838,7 +838,13 @@ class TestServerConfigBigQueryFields:
        resp = c.get("/api/admin/server-config", headers=_auth(token))
        assert resp.status_code == 200, resp.text
        bq = resp.json()["sections"]["data_source"]["bigquery"]
-        assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
+        # `billing_project` intentionally NOT auto-seeded into GET payload
+        # — that would inject empty string and break the placeholder_from
+        # render path. The field still appears in `known_fields` (so the
+        # UI knows about it) and renders via the unset path with the
+        # `(defaults to <project>)` placeholder. Devin Review iter #3.
+        assert "billing_project" not in bq, \
+            f"billing_project should not be auto-seeded in sections; got: {bq}"
        assert "max_bytes_per_materialize" in bq, \
            f"max_bytes_per_materialize missing from GET: {bq}"
        # legacy_wrap_views was removed in #160; UI must NOT surface it any