agnes-the-ai-analyst/tests/test_bq_materialize_query_wrapping.py
ZdenekSrotyr d8a2299633 fix(bq-materialize): wrap admin SQL in bigquery_query() so views work
Pre-fix, materialize ran the admin source_query as 'COPY (sql) TO parquet'
through the DuckDB BQ extension session. The extension defaults to the
BQ Storage Read API for bq.<ds>.<tbl> references, which rejects views
('non-table entities cannot be read with the storage API'). The fix
always wraps admin SQL into bigquery_query('<billing>', '<inner>') so
COPY uses the BQ jobs API uniformly for tables and views.

Cost guardrail dry-run now operates on the inner SQL (BQ-native), so
the BQ Python client parses it and the cap engages — pre-fix the dry-run
hit 'Table-valued function not found: bigquery_query' and fail-opened.
2026-05-04 16:40:40 +02:00

54 lines
1.8 KiB
Python

"""materialize_query must always wrap admin source_query in
bigquery_query('<billing>', '<admin>') so the COPY uses BQ jobs API,
which works for base tables AND views — Storage Read API does not."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from connectors.bigquery.extractor import (
_wrap_admin_sql_for_jobs_api,
_escape_sql_string_literal,
)
def test_wrap_simple_select():
out = _wrap_admin_sql_for_jobs_api(
billing_project="prj-billing",
inner_sql="SELECT * FROM `ds.tbl`",
)
assert out == (
"SELECT * FROM bigquery_query('prj-billing', "
"'SELECT * FROM `ds.tbl`')"
)
def test_escape_single_quotes_in_inner_sql():
inner = "SELECT name FROM `ds.tbl` WHERE country = 'CZ'"
escaped = _escape_sql_string_literal(inner)
assert escaped == "SELECT name FROM `ds.tbl` WHERE country = ''CZ''"
def test_wrap_with_inner_quotes_round_trips():
inner = "SELECT * FROM `ds.tbl` WHERE col = 'foo''bar'"
out = _wrap_admin_sql_for_jobs_api("myproject", inner)
# Outer string-literal envelope must double the inner single quotes
# so DuckDB's parser sees a balanced literal.
assert out.count("'") % 2 == 0
# Round-trip: stripping the wrapper gives back the original inner exactly.
prefix = "SELECT * FROM bigquery_query('myproject', '"
assert out.startswith(prefix)
assert out.endswith("')")
middle = out[len(prefix):-2]
# DuckDB string literal escape: '' → '. Reverse it.
decoded = middle.replace("''", "'")
assert decoded == inner
def test_billing_project_validates_format():
with pytest.raises(ValueError, match="billing_project"):
_wrap_admin_sql_for_jobs_api(
billing_project="bad project'; DROP",
inner_sql="SELECT 1",
)