fix(query): #168 review iter 3 — RBAC name-vs-id, placeholder dead code
Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed. 🔴 RBAC check at app/api/query.py:362 used `row["name"]` against `accessible_set`, but `accessible_set` is keyed by registry IDs (`get_accessible_tables` returns `resource_grants.resource_id` — table IDs, not display names). Confirmed by `_table_blocks` projection at `app/resource_types.py:157-158`. When `id != name` (e.g. `id="bq.finance.ue", name="ue"`), non-admin users with valid grants got 403 `bq_path_access_denied`. Switch to `row["id"]`. 🚩 Bare-name pass at app/api/query.py:332 had the same name-vs-id mismatch (different impact): legitimate accessible rows were skipped from `dry_run_set`, so the cost guardrail under-counted scan bytes for non-admin users. Could let an over-cap query through and under-bill quota. Switch to `row_id` comparison. 🟡 `placeholder_from` for billing_project was dead code. `_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty string into every GET payload via `_ensure_bq_optional_fields`. JS `isUnset = (value === undefined)` evaluated False, so the `(defaults to <project>)` placeholder NEVER rendered. Drop the seed — field stays in `known_fields` (UI sees it) but routes through the unset rendering path on GET, where placeholder_from fires. Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped from "billing_project IS present" to "billing_project NOT auto-seeded" to lock in the new shape. 67 affected tests pass.
This commit is contained in:
parent
5eaa449fcc
commit
28aba4c1f9
3 changed files with 31 additions and 6 deletions
|
|
@ -803,7 +803,13 @@ class ServerConfigUpdateRequest(BaseModel):
|
|||
# - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'`
|
||||
# (default 10 GiB; 0 disables; null falls through to the default).
|
||||
_BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = {
|
||||
"billing_project": "",
|
||||
# `billing_project` intentionally NOT seeded here. The empty-string
|
||||
# default would inject `billing_project: ""` into every GET payload,
|
||||
# which makes the JS `isUnset = (value === undefined)` check evaluate
|
||||
# False — and the `(defaults to <project>)` placeholder feature
|
||||
# (#160 §4.7.5) would never render. Leaving it absent keeps the
|
||||
# field in the unset rendering path so placeholder_from fires.
|
||||
# Devin Review iter #3 on PR #168.
|
||||
"max_bytes_per_materialize": 10737418240,
|
||||
"bq_max_scan_bytes": 5368709120,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -318,6 +318,15 @@ def _bq_guardrail_inputs(
|
|||
# 1. Bare-name pass: look up registered remote-BQ names that appear in
|
||||
# the user SQL as word-boundary tokens. Reuses the same regex shape as
|
||||
# the existing forbidden-table loop above.
|
||||
#
|
||||
# `accessible_set` comes from `get_accessible_tables()` which returns
|
||||
# `resource_grants.resource_id` values — i.e. table registry IDs, NOT
|
||||
# display names. Devin Review iter #3 caught the mismatch: when
|
||||
# `id != name` (e.g. id="bq.finance.ue", name="ue"), legitimate
|
||||
# accessible rows were skipped, under-counting dry-run bytes for the
|
||||
# cost cap. The user SQL still references the display `name` (that's
|
||||
# what shows in `da catalog`), so the regex match below uses `name`,
|
||||
# but the access gate uses `id`.
|
||||
dry_run: list = []
|
||||
seen_paths: set = set()
|
||||
accessible_set = set(allowed) if allowed is not None else None
|
||||
|
|
@ -327,9 +336,10 @@ def _bq_guardrail_inputs(
|
|||
bucket = r.get("bucket")
|
||||
source_table = r.get("source_table")
|
||||
name = r.get("name")
|
||||
if not (bucket and source_table and name):
|
||||
row_id = r.get("id")
|
||||
if not (bucket and source_table and name and row_id):
|
||||
continue
|
||||
if accessible_set is not None and name not in accessible_set:
|
||||
if accessible_set is not None and row_id not in accessible_set:
|
||||
# Forbidden-table loop above will have rejected the request
|
||||
# before we get here. Defensive skip.
|
||||
continue
|
||||
|
|
@ -357,9 +367,12 @@ def _bq_guardrail_inputs(
|
|||
"registered name from `da catalog`."
|
||||
),
|
||||
}
|
||||
# Row exists. Per-name grant check (non-admin only).
|
||||
# Row exists. Per-id grant check (non-admin only).
|
||||
# `accessible_set` is keyed by registry id (resource_grants
|
||||
# resource_id), so use `row["id"]` here, not display name.
|
||||
# Devin Review iter #3.
|
||||
if not is_admin:
|
||||
if accessible_set is None or row["name"] not in accessible_set:
|
||||
if accessible_set is None or row["id"] not in accessible_set:
|
||||
return [], {
|
||||
"reason": "bq_path_access_denied",
|
||||
"path": f'bq."{bucket_raw}"."{source_table_raw}"',
|
||||
|
|
|
|||
|
|
@ -838,7 +838,13 @@ class TestServerConfigBigQueryFields:
|
|||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||||
assert resp.status_code == 200, resp.text
|
||||
bq = resp.json()["sections"]["data_source"]["bigquery"]
|
||||
assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
|
||||
# `billing_project` intentionally NOT auto-seeded into GET payload
|
||||
# — that would inject empty string and break the placeholder_from
|
||||
# render path. The field still appears in `known_fields` (so the
|
||||
# UI knows about it) and renders via the unset path with the
|
||||
# `(defaults to <project>)` placeholder. Devin Review iter #3.
|
||||
assert "billing_project" not in bq, \
|
||||
f"billing_project should not be auto-seeded in sections; got: {bq}"
|
||||
assert "max_bytes_per_materialize" in bq, \
|
||||
f"max_bytes_per_materialize missing from GET: {bq}"
|
||||
# legacy_wrap_views was removed in #160; UI must NOT surface it any
|
||||
|
|
|
|||
Loading…
Reference in a new issue