5 new test files for the upcoming /api/query pre-flight block (next
commit). All failing for the right reason on the current codebase:
tests/test_query_bq_regex.py (8 + 1 + 7 + 1 = 17 cases)
Pure unit test of `BQ_PATH` regex constant (not yet imported from
app.api.query). Verifies the 16-case matrix from spec §4.3.1:
positive matches for fully-quoted / unquoted / mixed quoting / case
variants / inside CTE bodies / multiple paths in one statement;
negative for bare registered names / 2-part bq.col / prefix that
contains bq / middle-component bq / quoted bare names; documented
string-literal false-positive accepted.
tests/test_query_bigquery_query_blocked.py (3 cases)
POST /api/query with bigquery_query() function call must hit the
canonical blocklist rejection ("Only single SELECT queries are
allowed"). Today the blocklist passes all 3 — confirmed RED via
detail-string assertion.
tests/test_api_query_rbac_bq_path.py (4 cases)
Direct bq."<ds>"."<tbl>" references must be registry-gated:
unregistered → 403 bq_path_not_registered; registered + admin →
bypass per-name grant; case-insensitive lookup; string-literal
containing bq.X.Y → 403 (strict-deny).
tests/test_api_query_guardrail.py (3 cases)
Cost guardrail: SQL referencing a registered remote BQ row invokes
_bq_dry_run_bytes (verified via call-counter side effect); over-cap
dry-run returns 400 remote_scan_too_large with bytes/tables/suggestion
in detail; non-BQ queries skip the dry-run entirely.
tests/test_api_query_quota.py (3 cases)
Daily-byte quota check_daily_budget pre-flight (over-cap → 429
before dry-run); record_bytes post-flight on the shared singleton
v2_quota tracker; non-BQ queries leave the counter alone.
RED breakdown: 16 ImportError (BQ_PATH not yet defined) + 7 assertion
failures = 23 fully-RED. 6 tests pass for regression-green reasons
(use `if r.status_code == 403:` patterns where current code returns
400 for unrelated reasons). They serve as anti-regression guards once
the implementation lands and remain green throughout — documented per
spec §6 Phase 1 RED-discipline notes.
68 lines
2.7 KiB
Python
68 lines
2.7 KiB
Python
"""POST /api/query must reject direct `bigquery_query()` function calls.
|
|
|
|
This is a pre-existing RBAC bypass: `bigquery_query('proj', 'SELECT * FROM
|
|
ds.tbl')` runs a BQ jobs API call against any reachable dataset, ignoring
|
|
the master-view forbidden-table check that gates registered names. Closes
|
|
that hole by adding `bigquery_query` to the SQL keyword blocklist.
|
|
|
|
Internal wrap views (created by the BQ extractor) use bigquery_query()
|
|
inside their CREATE VIEW body — those run via DuckDB's view resolution at
|
|
query time, NOT via user-submitted SQL, so the blocklist doesn't break
|
|
them. Closes part of #160.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
|
|
def _auth(token: str) -> dict:
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
def test_bigquery_query_function_call_rejected(seeded_app):
|
|
"""Plain `SELECT * FROM bigquery_query(...)` is blocked at the
|
|
keyword-blocklist layer with the canonical "Only single SELECT
|
|
queries are allowed" detail."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
sql = "SELECT * FROM bigquery_query('proj', 'SELECT 1 AS x')"
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": sql},
|
|
headers=_auth(token),
|
|
)
|
|
assert r.status_code == 400, f"expected 400; got {r.status_code} body={r.json()}"
|
|
detail = str(r.json().get("detail", ""))
|
|
# The canonical blocklist message proves this was rejected by the
|
|
# blocklist (not by some other path like master-view-forbidden).
|
|
assert "single SELECT" in detail, \
|
|
f"expected canonical blocklist message; got detail={detail!r}"
|
|
|
|
|
|
def test_bigquery_query_mixed_case_rejected(seeded_app):
|
|
"""Existing blocklist runs `sql.strip().lower()` first, so any case
|
|
variant is blocked uniformly."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": "SELECT * FROM BigQuery_Query('proj', 'SELECT 1')"},
|
|
headers=_auth(token),
|
|
)
|
|
assert r.status_code == 400, r.json()
|
|
detail = str(r.json().get("detail", ""))
|
|
assert "single SELECT" in detail, \
|
|
f"expected canonical blocklist message; got detail={detail!r}"
|
|
|
|
|
|
def test_bigquery_query_with_whitespace_before_paren_rejected(seeded_app):
|
|
"""Substring match catches `bigquery_query (...)` with space too."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": "SELECT * FROM bigquery_query ('proj', 'SELECT 1')"},
|
|
headers=_auth(token),
|
|
)
|
|
assert r.status_code == 400, r.json()
|
|
detail = str(r.json().get("detail", ""))
|
|
assert "single SELECT" in detail, \
|
|
f"expected canonical blocklist message; got detail={detail!r}"
|