From 77cdb65f7670486c0e4b05665073837c1e8ca138 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Sun, 3 May 2026 19:35:14 +0200 Subject: [PATCH] sec(query): #160 BQ_PATH catches quoted "bq" catalog token (Phase 3 review) Phase 3 review identified an RBAC + cost-cap bypass: `SELECT * FROM "bq"."ds"."tbl"` (catalog token quoted as a DuckDB identifier) was NOT matched by the BQ_PATH regex, so direct quoted-form references skipped both the registry check and the cost-cap dry-run. DuckDB resolves `"bq"` to the same ATTACHed BQ catalog, so the bypass is real. Widen the catalog-token alternation: `(?:"bq"|bq)` matches both forms. Negative lookbehind `(?.` references in user -# SQL. Matches all 16 cases verified empirically (fully-quoted, unquoted, -# mixed quoting, case-insensitive, inside CTE bodies, multiple in one stmt). +# SQL. Catalog token accepts both `bq` (the unquoted DuckDB-style name) and +# `"bq"` (quoted identifier). DuckDB resolves both to the same ATTACHed +# catalog, so the security-boundary regex must accept both โ€” Phase 3 review +# caught the quoted variant as an RBAC + cost-cap bypass. # Lookahead `(?=\W|$)` works where `\b` doesn't (after a closing quote). -# Negative lookbehind `(? None: sys_conn.close() +def test_quoted_bq_catalog_token_rejected_403(seeded_app): + """Phase 3 review evasion: `SELECT * FROM "bq"."ds"."tbl"` (catalog + token quoted) must be caught by the same RBAC check as the unquoted + form. DuckDB resolves `"bq"` to the same ATTACHed BQ catalog, so the + quoted variant is a real bypass we have to close.""" + c = seeded_app["client"] + token = seeded_app["admin_token"] + r = c.post( + "/api/query", + json={"sql": 'SELECT * FROM "bq"."secret_ds"."secret_tbl"'}, + headers=_auth(token), + ) + assert r.status_code == 403, r.json() + detail = r.json().get("detail", {}) + if isinstance(detail, dict): + assert detail.get("reason") == "bq_path_not_registered", detail + + def test_unregistered_bq_path_rejected_403(seeded_app): """Direct reference to a `bq..` that no registry row points at: 403 with `bq_path_not_registered`. Caller has admin token (no per-name diff --git a/tests/test_query_bq_regex.py b/tests/test_query_bq_regex.py index 219f345..5e49f98 100644 --- a/tests/test_query_bq_regex.py +++ b/tests/test_query_bq_regex.py @@ -25,6 +25,9 @@ def regex(): ('SELECT * FROM bq.finance."ue"', ('finance', '"ue"')), # Case-insensitive ('select * from BQ.Finance.UE', ('Finance', 'UE')), + # Quoted catalog token "bq" โ€” Phase 3 review evasion: must be caught + ('SELECT * FROM "bq"."finance"."ue"', ('"finance"', '"ue"')), + ('SELECT * FROM "BQ"."ds"."tbl"', ('"ds"', '"tbl"')), # With WHERE ('SELECT a FROM bq.ds.tbl WHERE x=1', ('ds', 'tbl')), # Inside CTE body @@ -55,6 +58,7 @@ def test_regex_finds_multiple_paths_in_one_statement(regex): ('SELECT bq.col FROM tbl', '2-part bq.col is column qualifier, not catalog'), ('SELECT count(*) FROM unit_economics', 'aggregate on bare name'), ('SELECT * FROM other_bq.ds.tbl', 'prefix that contains bq'), + ('SELECT * FROM "my_bq".ds.tbl', 'quoted prefix that contains bq'), ('SELECT * FROM x.bq.ds.tbl', 'bq is middle component, not catalog'), ]) def test_regex_rejects_non_bq_paths(regex, sql, reason):