fix(query): #168 review iter 3 — RBAC name-vs-id, placeholder dead code

Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed.

🔴 RBAC check at app/api/query.py:362 used `row["name"]` against
`accessible_set`, but `accessible_set` is keyed by registry IDs
(`get_accessible_tables` returns `resource_grants.resource_id` —
table IDs, not display names). Confirmed by `_table_blocks` projection
at `app/resource_types.py:157-158`. When `id != name` (e.g.
`id="bq.finance.ue", name="ue"`), non-admin users with valid grants
got 403 `bq_path_access_denied`. Switch to `row["id"]`.

🚩 Bare-name pass at app/api/query.py:332 had the same name-vs-id
mismatch (different impact): legitimate accessible rows were skipped
from `dry_run_set`, so the cost guardrail under-counted scan bytes
for non-admin users. Could let an over-cap query through and
under-bill quota. Switch to `row_id` comparison.

🟡 `placeholder_from` for billing_project was dead code.
`_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty
string into every GET payload via `_ensure_bq_optional_fields`. JS
`isUnset = (value === undefined)` evaluated False, so the
`(defaults to <project>)` placeholder NEVER rendered. Drop the seed —
field stays in `known_fields` (UI sees it) but routes through the
unset rendering path on GET, where placeholder_from fires.

Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped
from "billing_project IS present" to "billing_project NOT auto-seeded"
to lock in the new shape. 67 affected tests pass.
This commit is contained in:
ZdenekSrotyr 2026-05-04 13:51:36 +02:00
parent 5eaa449fcc
commit 28aba4c1f9
3 changed files with 31 additions and 6 deletions

View file

@ -803,7 +803,13 @@ class ServerConfigUpdateRequest(BaseModel):
# - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'`
# (default 10 GiB; 0 disables; null falls through to the default).
_BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = {
"billing_project": "",
# `billing_project` intentionally NOT seeded here. The empty-string
# default would inject `billing_project: ""` into every GET payload,
# which makes the JS `isUnset = (value === undefined)` check evaluate
# False — and the `(defaults to <project>)` placeholder feature
# (#160 §4.7.5) would never render. Leaving it absent keeps the
# field in the unset rendering path so placeholder_from fires.
# Devin Review iter #3 on PR #168.
"max_bytes_per_materialize": 10737418240,
"bq_max_scan_bytes": 5368709120,
}

View file

@ -318,6 +318,15 @@ def _bq_guardrail_inputs(
# 1. Bare-name pass: look up registered remote-BQ names that appear in
# the user SQL as word-boundary tokens. Reuses the same regex shape as
# the existing forbidden-table loop above.
#
# `accessible_set` comes from `get_accessible_tables()` which returns
# `resource_grants.resource_id` values — i.e. table registry IDs, NOT
# display names. Devin Review iter #3 caught the mismatch: when
# `id != name` (e.g. id="bq.finance.ue", name="ue"), legitimate
# accessible rows were skipped, under-counting dry-run bytes for the
# cost cap. The user SQL still references the display `name` (that's
# what shows in `da catalog`), so the regex match below uses `name`,
# but the access gate uses `id`.
dry_run: list = []
seen_paths: set = set()
accessible_set = set(allowed) if allowed is not None else None
@ -327,9 +336,10 @@ def _bq_guardrail_inputs(
bucket = r.get("bucket")
source_table = r.get("source_table")
name = r.get("name")
if not (bucket and source_table and name):
row_id = r.get("id")
if not (bucket and source_table and name and row_id):
continue
if accessible_set is not None and name not in accessible_set:
if accessible_set is not None and row_id not in accessible_set:
# Forbidden-table loop above will have rejected the request
# before we get here. Defensive skip.
continue
@ -357,9 +367,12 @@ def _bq_guardrail_inputs(
"registered name from `da catalog`."
),
}
# Row exists. Per-name grant check (non-admin only).
# Row exists. Per-id grant check (non-admin only).
# `accessible_set` is keyed by registry id (resource_grants
# resource_id), so use `row["id"]` here, not display name.
# Devin Review iter #3.
if not is_admin:
if accessible_set is None or row["name"] not in accessible_set:
if accessible_set is None or row["id"] not in accessible_set:
return [], {
"reason": "bq_path_access_denied",
"path": f'bq."{bucket_raw}"."{source_table_raw}"',

View file

@ -838,7 +838,13 @@ class TestServerConfigBigQueryFields:
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 200, resp.text
bq = resp.json()["sections"]["data_source"]["bigquery"]
assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
# `billing_project` intentionally NOT auto-seeded into GET payload
# — that would inject empty string and break the placeholder_from
# render path. The field still appears in `known_fields` (so the
# UI knows about it) and renders via the unset path with the
# `(defaults to <project>)` placeholder. Devin Review iter #3.
assert "billing_project" not in bq, \
f"billing_project should not be auto-seeded in sections; got: {bq}"
assert "max_bytes_per_materialize" in bq, \
f"max_bytes_per_materialize missing from GET: {bq}"
# legacy_wrap_views was removed in #160; UI must NOT surface it any