fix(query): wrap backtick-only BQ paths in bigquery_query() to avoid local DuckDB parser error
SQL using only a full backtick path (`<proj>.<dataset>.<table>`) as the table reference had neither bare name_lookups nor direct bq.ds.tbl matches, so _rewrite_user_sql_for_bigquery_query's Skip 1 returned the original SQL unchanged. DuckDB then rejected the backtick syntax locally with "syntax error at or near `"" before the query ever reached BigQuery. Detect _BACKTICK_FULL_PATH matches in the rewriter and include them in the Skip 1 guard so the SQL gets wrapped in bigquery_query(). No identifier rewrite is needed — backtick paths are already BQ-native and _rewrite_bq_table_refs_to_native preserves them verbatim via its backtick-split pass. Closes #363
This commit is contained in:
parent
b7872d1c16
commit
9f778251c3
3 changed files with 109 additions and 1 deletions
|
|
@ -15,6 +15,7 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
- `agnes query --remote`: SQL using only a full backtick BQ path (`` `<proj>.<dataset>.<table>` ``) no longer fails with `Parser Error: syntax error at or near "``"`. The rewriter now detects backtick-quoted paths and wraps them in `bigquery_query()` before passing to DuckDB, instead of sending the BQ-native backtick syntax to the local DuckDB parser. (#363)
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1086,7 +1086,17 @@ def _rewrite_user_sql_for_bigquery_query(
|
||||||
source_table_raw = m.group(2).strip('"')
|
source_table_raw = m.group(2).strip('"')
|
||||||
direct_paths.add((bucket_raw, source_table_raw))
|
direct_paths.add((bucket_raw, source_table_raw))
|
||||||
|
|
||||||
if not name_lookups and not direct_paths:
|
# Issue #363: full backtick BQ paths (`<project>.<dataset>.<table>`) are
|
||||||
|
# already BQ-native syntax — DuckDB can't parse backtick quoting locally.
|
||||||
|
# When the user SQL uses only backtick paths (no bare names, no bq.ds.tbl),
|
||||||
|
# both name_lookups and direct_paths stay empty and Skip 1 fires, sending
|
||||||
|
# the backtick SQL to analytics.execute() → "syntax error at or near `"`.
|
||||||
|
# Detect them here so the SQL still gets wrapped in bigquery_query().
|
||||||
|
# _rewrite_bq_table_refs_to_native already preserves backtick segments
|
||||||
|
# verbatim (backtick-split pass 1), so no additional rewrite is needed.
|
||||||
|
has_backtick_paths = bool(_BACKTICK_FULL_PATH.search(user_sql))
|
||||||
|
|
||||||
|
if not name_lookups and not direct_paths and not has_backtick_paths:
|
||||||
# Skip 1: no BQ tables referenced.
|
# Skip 1: no BQ tables referenced.
|
||||||
return user_sql, False
|
return user_sql, False
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -270,6 +270,57 @@ def test_local_name_inside_backtick_path_does_not_trip_cross_source(
|
||||||
assert "test-prj.dataset.orders" in rewritten
|
assert "test-prj.dataset.orders" in rewritten
|
||||||
|
|
||||||
|
|
||||||
|
def test_full_backtick_path_only_wraps_in_bigquery_query(seeded_registry, monkeypatch):
|
||||||
|
"""Issue #363: user SQL uses only a full backtick BQ path
|
||||||
|
(`<proj>.<dataset>.<table>`) — no bare catalog name, no bq.ds.tbl.
|
||||||
|
Pre-fix: name_lookups and direct_paths both empty → Skip 1 fires →
|
||||||
|
original backtick SQL reaches analytics.execute() → DuckDB parser error
|
||||||
|
"syntax error at or near `"".
|
||||||
|
Post-fix: has_backtick_paths detects the pattern → Skip 1 does NOT fire
|
||||||
|
→ SQL is wrapped in bigquery_query() and BQ executes it natively.
|
||||||
|
"""
|
||||||
|
from app.api.query import _rewrite_user_sql_for_bigquery_query
|
||||||
|
_register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
|
||||||
|
bucket="fin", source_table="ue")
|
||||||
|
_set_bq_project(monkeypatch, "test-prj")
|
||||||
|
|
||||||
|
user_sql = (
|
||||||
|
"SELECT COUNT(*) FROM `test-prj.fin.ue` WHERE event_date = DATE '2026-05-15'"
|
||||||
|
)
|
||||||
|
rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
|
||||||
|
user_sql, seeded_registry,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert did_rewrite is True
|
||||||
|
assert "bigquery_query(" in rewritten
|
||||||
|
# The backtick path is already BQ-native — preserved verbatim inside the wrap.
|
||||||
|
assert "`test-prj.fin.ue`" in rewritten
|
||||||
|
# Original backtick SQL must not be sent raw to DuckDB's local parser.
|
||||||
|
assert rewritten != user_sql
|
||||||
|
|
||||||
|
|
||||||
|
def test_full_backtick_path_only_unregistered_table_still_wraps(
|
||||||
|
seeded_registry, monkeypatch,
|
||||||
|
):
|
||||||
|
"""Backtick-only SQL whose path happens NOT to be registered still gets
|
||||||
|
wrapped — the guardrail upstream (_bq_guardrail_inputs pass 3) is the
|
||||||
|
RBAC gate; the rewriter's job is only to avoid sending backtick syntax
|
||||||
|
to DuckDB locally. An unregistered-path error from BQ is surfaced cleanly
|
||||||
|
rather than a local "syntax error at or near `"".
|
||||||
|
"""
|
||||||
|
from app.api.query import _rewrite_user_sql_for_bigquery_query
|
||||||
|
_set_bq_project(monkeypatch, "test-prj")
|
||||||
|
|
||||||
|
user_sql = "SELECT * FROM `test-prj.some_ds.some_tbl` LIMIT 5"
|
||||||
|
rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
|
||||||
|
user_sql, seeded_registry,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert did_rewrite is True
|
||||||
|
assert "bigquery_query(" in rewritten
|
||||||
|
assert "test-prj.some_ds.some_tbl" in rewritten
|
||||||
|
|
||||||
|
|
||||||
def test_no_bq_tables_passes_through(seeded_registry, monkeypatch):
|
def test_no_bq_tables_passes_through(seeded_registry, monkeypatch):
|
||||||
"""User SQL referencing only local-source tables → no rewrite,
|
"""User SQL referencing only local-source tables → no rewrite,
|
||||||
no log spam, original SQL returned."""
|
no log spam, original SQL returned."""
|
||||||
|
|
@ -737,3 +788,49 @@ def test_endpoint_does_not_fall_back_on_non_parse_errors(
|
||||||
# the runtime error message; we just need to confirm no fallback.
|
# the runtime error message; we just need to confirm no fallback.
|
||||||
assert r.status_code in (400, 500, 502)
|
assert r.status_code in (400, 500, 502)
|
||||||
assert len(calls["sqls"]) == 1, "must not retry on non-parse error"
|
assert len(calls["sqls"]) == 1, "must not retry on non-parse error"
|
||||||
|
|
||||||
|
|
||||||
|
def test_endpoint_backtick_only_path_wraps_not_local_parse(
|
||||||
|
seeded_app, stub_bq_for_endpoint, monkeypatch,
|
||||||
|
):
|
||||||
|
"""Issue #363 regression: SQL using only a full backtick BQ path must
|
||||||
|
be wrapped in bigquery_query() before reaching analytics.execute() —
|
||||||
|
NOT passed raw to the local DuckDB instance that would reject the
|
||||||
|
backtick syntax with "syntax error at or near `"".
|
||||||
|
"""
|
||||||
|
_register_bq_remote_row("ue", "fin", "ue")
|
||||||
|
|
||||||
|
captured = {"sql": None}
|
||||||
|
|
||||||
|
class _StubAnalytics:
|
||||||
|
description = [("cnt",)]
|
||||||
|
def execute(self, sql, *args, **kwargs):
|
||||||
|
captured["sql"] = sql
|
||||||
|
class _R:
|
||||||
|
def fetchmany(self, _n):
|
||||||
|
return [(42,)]
|
||||||
|
return _R()
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.api.query.get_analytics_db_readonly",
|
||||||
|
lambda: _StubAnalytics(),
|
||||||
|
raising=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
c = seeded_app["client"]
|
||||||
|
token = seeded_app["admin_token"]
|
||||||
|
r = c.post(
|
||||||
|
"/api/query",
|
||||||
|
json={"sql": "SELECT COUNT(*) FROM `test-data-prj.fin.ue` WHERE x = 1"},
|
||||||
|
headers=_auth(token),
|
||||||
|
)
|
||||||
|
assert r.status_code == 200, r.json()
|
||||||
|
sent = captured["sql"]
|
||||||
|
assert sent is not None
|
||||||
|
assert "bigquery_query(" in sent, (
|
||||||
|
f"backtick-path SQL must be wrapped in bigquery_query(); got: {sent!r}"
|
||||||
|
)
|
||||||
|
# The backtick path survives verbatim inside the wrapper.
|
||||||
|
assert "test-data-prj.fin.ue" in sent
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue