From 28aba4c1f9db596a484854de180fd1bb0a6d7413 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 13:51:36 +0200 Subject: [PATCH] =?UTF-8?q?fix(query):=20#168=20review=20iter=203=20?= =?UTF-8?q?=E2=80=94=20RBAC=20name-vs-id,=20placeholder=20dead=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed. ๐Ÿ”ด RBAC check at app/api/query.py:362 used `row["name"]` against `accessible_set`, but `accessible_set` is keyed by registry IDs (`get_accessible_tables` returns `resource_grants.resource_id` โ€” table IDs, not display names). Confirmed by `_table_blocks` projection at `app/resource_types.py:157-158`. When `id != name` (e.g. `id="bq.finance.ue", name="ue"`), non-admin users with valid grants got 403 `bq_path_access_denied`. Switch to `row["id"]`. ๐Ÿšฉ Bare-name pass at app/api/query.py:332 had the same name-vs-id mismatch (different impact): legitimate accessible rows were skipped from `dry_run_set`, so the cost guardrail under-counted scan bytes for non-admin users. Could let an over-cap query through and under-bill quota. Switch to `row_id` comparison. ๐ŸŸก `placeholder_from` for billing_project was dead code. `_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty string into every GET payload via `_ensure_bq_optional_fields`. JS `isUnset = (value === undefined)` evaluated False, so the `(defaults to )` placeholder NEVER rendered. Drop the seed โ€” field stays in `known_fields` (UI sees it) but routes through the unset rendering path on GET, where placeholder_from fires. Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped from "billing_project IS present" to "billing_project NOT auto-seeded" to lock in the new shape. 67 affected tests pass. --- app/api/admin.py | 8 +++++++- app/api/query.py | 21 +++++++++++++++++---- tests/test_admin_server_config.py | 8 +++++++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/app/api/admin.py b/app/api/admin.py index 947a712..ba9a9b4 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -803,7 +803,13 @@ class ServerConfigUpdateRequest(BaseModel): # - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'` # (default 10 GiB; 0 disables; null falls through to the default). _BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = { - "billing_project": "", + # `billing_project` intentionally NOT seeded here. The empty-string + # default would inject `billing_project: ""` into every GET payload, + # which makes the JS `isUnset = (value === undefined)` check evaluate + # False โ€” and the `(defaults to )` placeholder feature + # (#160 ยง4.7.5) would never render. Leaving it absent keeps the + # field in the unset rendering path so placeholder_from fires. + # Devin Review iter #3 on PR #168. "max_bytes_per_materialize": 10737418240, "bq_max_scan_bytes": 5368709120, } diff --git a/app/api/query.py b/app/api/query.py index 5cb67fb..549a31b 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -318,6 +318,15 @@ def _bq_guardrail_inputs( # 1. Bare-name pass: look up registered remote-BQ names that appear in # the user SQL as word-boundary tokens. Reuses the same regex shape as # the existing forbidden-table loop above. + # + # `accessible_set` comes from `get_accessible_tables()` which returns + # `resource_grants.resource_id` values โ€” i.e. table registry IDs, NOT + # display names. Devin Review iter #3 caught the mismatch: when + # `id != name` (e.g. id="bq.finance.ue", name="ue"), legitimate + # accessible rows were skipped, under-counting dry-run bytes for the + # cost cap. The user SQL still references the display `name` (that's + # what shows in `da catalog`), so the regex match below uses `name`, + # but the access gate uses `id`. dry_run: list = [] seen_paths: set = set() accessible_set = set(allowed) if allowed is not None else None @@ -327,9 +336,10 @@ def _bq_guardrail_inputs( bucket = r.get("bucket") source_table = r.get("source_table") name = r.get("name") - if not (bucket and source_table and name): + row_id = r.get("id") + if not (bucket and source_table and name and row_id): continue - if accessible_set is not None and name not in accessible_set: + if accessible_set is not None and row_id not in accessible_set: # Forbidden-table loop above will have rejected the request # before we get here. Defensive skip. continue @@ -357,9 +367,12 @@ def _bq_guardrail_inputs( "registered name from `da catalog`." ), } - # Row exists. Per-name grant check (non-admin only). + # Row exists. Per-id grant check (non-admin only). + # `accessible_set` is keyed by registry id (resource_grants + # resource_id), so use `row["id"]` here, not display name. + # Devin Review iter #3. if not is_admin: - if accessible_set is None or row["name"] not in accessible_set: + if accessible_set is None or row["id"] not in accessible_set: return [], { "reason": "bq_path_access_denied", "path": f'bq."{bucket_raw}"."{source_table_raw}"', diff --git a/tests/test_admin_server_config.py b/tests/test_admin_server_config.py index b1a1eed..b339aa1 100644 --- a/tests/test_admin_server_config.py +++ b/tests/test_admin_server_config.py @@ -838,7 +838,13 @@ class TestServerConfigBigQueryFields: resp = c.get("/api/admin/server-config", headers=_auth(token)) assert resp.status_code == 200, resp.text bq = resp.json()["sections"]["data_source"]["bigquery"] - assert "billing_project" in bq, f"billing_project missing from GET: {bq}" + # `billing_project` intentionally NOT auto-seeded into GET payload + # โ€” that would inject empty string and break the placeholder_from + # render path. The field still appears in `known_fields` (so the + # UI knows about it) and renders via the unset path with the + # `(defaults to )` placeholder. Devin Review iter #3. + assert "billing_project" not in bq, \ + f"billing_project should not be auto-seeded in sections; got: {bq}" assert "max_bytes_per_materialize" in bq, \ f"max_bytes_per_materialize missing from GET: {bq}" # legacy_wrap_views was removed in #160; UI must NOT surface it any