From b2c1ff143cfd12c33f5c92328e01cce87752b0e6 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Wed, 6 May 2026 13:02:34 +0200
Subject: [PATCH] fix(query): rewrite BQ-backed user SQL via bigquery_query()
 to enable predicate pushdown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User SQL hitting query_mode='remote' BigQuery rows was 50-100x slower
than the equivalent direct bigquery_query() call because DuckDB's master
view (CREATE VIEW … AS SELECT * FROM bigquery.<ds>.<tbl>) does not push
WHERE/SELECT/LIMIT into BQ in ATTACH-catalog mode. The BQ extension opens
a Storage Read API session over the entire upstream table; on >100M-row
sources this was 70-150s and frequently failed with 'Response too large
to return'.

Extract the existing dry-run rewriter's core (table-name → BQ-native
backtick path) into a shared helper. Add an execution-path rewriter
that wraps the whole user SQL in bigquery_query('<project>', '<inner>')
so the BQ planner sees the full query and engages partition pruning +
projection pushdown server-side.

Conservative fall-through: cross-source JOINs (BQ ↔ Keboola/Jira local),
queries already containing bigquery_query(, and unconfigured BQ project
all skip the rewrite and run the original SQL via ATTACH-catalog so
behavior degrades gracefully.
---
 CHANGELOG.md                       |  18 ++
 app/api/query.py                   | 209 +++++++++++++-
 tests/test_query_remote_rewrite.py | 444 +++++++++++++++++++++++++++++
 3 files changed, 662 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_query_remote_rewrite.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 56fd2cc..86e66d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,24 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
 
 ## [Unreleased]
 
+### Fixed
+- **`/api/query` (and `agnes query --remote`) now rewrites user SQL referencing
+  `query_mode='remote'` BigQuery rows into a single `bigquery_query()` call
+  before execute** (`app/api/query.py`). Pre-fix the master view
+  (`CREATE VIEW <name> AS SELECT * FROM bigquery.<bucket>.<source_table>`) did
+  not push WHERE / SELECT / LIMIT into BQ — the DuckDB BQ extension opened a
+  Storage Read API session over the entire upstream table, scanning the full
+  partitioned dataset before the local DuckDB filter ran. On 100M+ row
+  remote-mode tables this was 50-100× slower than the equivalent direct
+  `bigquery_query()` call (70-150 s vs 1.5 s) and frequently failed with
+  `Response too large to return`. The rewriter (shared core with the existing
+  dry-run helper) wraps the user's whole SQL in `bigquery_query('<project>',
+  '<inner-sql>')` so the BQ planner receives the full query and applies
+  partition pruning + projection pushdown server-side. Conservative
+  fall-through: cross-source JOINs (BQ ↔ Keboola/Jira local), queries already
+  containing `bigquery_query(`, and unconfigured BQ project all keep the
+  original ATTACH-catalog path so behavior degrades gracefully.
+
 ## [0.38.3] — 2026-05-06
 
 ### Changed
diff --git a/app/api/query.py b/app/api/query.py
index a758dc4..473bd39 100644
--- a/app/api/query.py
+++ b/app/api/query.py
@@ -192,8 +192,32 @@ def execute_query(
             else contextlib.nullcontext()
         )
         with guard:
+            # Performance fix: rewrite user SQL referencing BQ-remote tables
+            # to a single ``bigquery_query()`` call so WHERE / projection /
+            # LIMIT push into BQ via jobs.query (1-2 s) instead of falling
+            # through DuckDB's ATTACH-catalog Storage Read API session over
+            # the full table (often 70-150 s, fails with "Response too
+            # large to return" on >100M-row sources). Helper returns the
+            # original SQL unchanged when rewriting would be unsafe
+            # (cross-source JOIN, no BQ tables referenced, double-wrap).
+            execution_sql, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+                request.sql, conn,
+            )
+            if did_rewrite:
+                logger.info(
+                    "query_rewrite_to_bigquery_query: user_id=%s — wrapped "
+                    "SQL in bigquery_query() for BQ predicate pushdown",
+                    user_id,
+                )
+            else:
+                logger.debug(
+                    "query_rewrite_skipped: user_id=%s — running original "
+                    "SQL via ATTACH-catalog path",
+                    user_id,
+                )
+
             # Open in read-only mode for extra safety
-            result = analytics.execute(request.sql).fetchmany(request.limit + 1)
+            result = analytics.execute(execution_sql).fetchmany(request.limit + 1)
             columns = [desc[0] for desc in analytics.description] if analytics.description else []
             truncated = len(result) > request.limit
             rows = result[:request.limit]
@@ -432,12 +456,11 @@ def _bq_guardrail_inputs(
     return dry_run, name_lookups, None
 
 
-def _rewrite_user_sql_for_bq_dry_run(
+def _rewrite_bq_table_refs_to_native(
     sql: str, name_lookups: list, project: str,
 ) -> str:
-    """Rewrite user SQL from DuckDB-flavor to BQ-native so a single
-    `_bq_dry_run_bytes` call can estimate scan size for the EXACT query
-    the user submitted (issue #171).
+    """Core identifier rewrite: DuckDB-flavor table references → BQ-native
+    backtick form. Shared between dry-run and execution-path rewriters.
 
     Two transformations:
 
@@ -463,13 +486,15 @@ def _rewrite_user_sql_for_bq_dry_run(
     inside a string literal (e.g. an `IN (...)` value or a `LIKE` pattern)
     will also be rewritten. This is acceptable because (a) it's vanishingly
     rare to have a string literal exactly matching a registered table name,
-    and (b) when it does happen the dry-run errors out and the caller falls
-    back to the per-table SELECT * estimate (current behavior, no regression).
+    and (b) when it does happen the caller's error path covers the case
+    (dry-run falls back to per-table SELECT * estimate; execution falls
+    through to the ATTACH-catalog path).
 
     CTE shadowing: a `WITH unit_economics AS (...)` followed by `FROM
     unit_economics` would also rewrite the `FROM` reference. BQ then treats
-    the CTE as unreferenced (legal) and the dry-run estimates the rewritten
-    physical table — likely an over-estimate. Same fallback path covers this.
+    the CTE as unreferenced (legal) and the rewriter's caller deals with
+    the consequence — over-estimation for dry-run, fall-through-to-ATTACH
+    via BQ parse error for execution.
     """
     out = sql
 
@@ -509,6 +534,172 @@ def _rewrite_user_sql_for_bq_dry_run(
     return out
 
 
+def _rewrite_user_sql_for_bq_dry_run(
+    sql: str, name_lookups: list, project: str,
+) -> str:
+    """Rewrite user SQL from DuckDB-flavor to BQ-native so a single
+    `_bq_dry_run_bytes` call can estimate scan size for the EXACT query
+    the user submitted (issue #171). Thin wrapper around the shared
+    core; kept as a stable name for callers in /api/query's cap-guard.
+    """
+    return _rewrite_bq_table_refs_to_native(sql, name_lookups, project)
+
+
+def _rewrite_user_sql_for_bigquery_query(
+    user_sql: str, conn: duckdb.DuckDBPyConnection,
+) -> tuple[str, bool]:
+    """Rewrite user SQL so the entire query ships to BQ as a single
+    ``bigquery_query(<project>, <inner-sql>)`` call.
+
+    Returns ``(rewritten_sql, did_rewrite)``. When ``did_rewrite`` is
+    ``False``, the caller MUST execute the original ``user_sql`` via the
+    ATTACH-catalog path (slow but correct); the rewriter is conservative
+    on purpose — wrapping cross-source queries in ``bigquery_query()``
+    would silently lose the local-side data.
+
+    Why this matters
+    ----------------
+    The orchestrator's master view (``CREATE VIEW name AS SELECT * FROM
+    bigquery.<bucket>.<source_table>``) does not push WHERE / projections
+    into BQ when DuckDB resolves the query — the BQ extension opens a
+    Storage Read API session over the entire table, which on multi-100M-row
+    tables is 50-100× slower than letting BQ run the query server-side.
+    Wrapping the user's SQL in ``bigquery_query('<project>', '<inner>')``
+    makes the BQ extension issue a ``jobs.query`` instead, with full
+    predicate pushdown.
+
+    Skip rules (returns ``(user_sql, False)``)
+    ------------------------------------------
+    1. No registered ``query_mode='remote'`` BQ row referenced in the SQL.
+       Nothing to rewrite — original SQL passes through unchanged.
+    2. User SQL already contains ``bigquery_query(`` — never double-wrap.
+       (The /api/query keyword denylist also blocks this in production;
+       defensive guard for callers in other contexts.)
+    3. SQL also references a non-BQ master view (Keboola/Jira local-mode
+       table). Wrapping would lose those references — fall through to
+       ATTACH-catalog so the cross-source query still runs.
+    4. ``get_bq_access()`` returns the unconfigured sentinel
+       (``data == ''``). No project to fill into ``bigquery_query()``.
+
+    Edge cases preserved by design
+    ------------------------------
+    - CTEs / sub-queries referencing BQ tables: the table-name rewrite
+      happens at every match position, then the whole SQL is wrapped in
+      one ``bigquery_query()``. BQ supports CTEs, so this works.
+    - Multiple BQ tables, same project: combined into ONE wrap (single
+      jobs.query). DuckDB's BQ extension doesn't support multi-project
+      JOINs in a single ``bigquery_query()`` call today; if/when the
+      registry grows per-table source_project, this helper would need to
+      gate on cross-project mixing.
+    - ``bq."ds"."tbl"`` direct paths: rewritten to BQ-native backticks
+      via the same shared core as dry-run.
+    """
+    # Skip 2: don't double-wrap. Cheap pre-check before any registry I/O.
+    if "bigquery_query(" in user_sql.lower():
+        return user_sql, False
+
+    # Find all referenced BQ remote-mode rows (bare-name + direct bq.path).
+    # Mirrors the non-RBAC parts of `_bq_guardrail_inputs`.
+    sql_lower = user_sql.lower()
+    name_lookups: list = []
+    seen_paths: set = set()
+
+    try:
+        repo = TableRegistryRepository(conn)
+        bq_rows = repo.list_by_source("bigquery")
+        all_rows = repo.list_all()
+    except Exception:
+        # Registry read failure — let the original SQL run through the
+        # ATTACH-catalog path. The handler's generic error path will
+        # surface anything user-visible.
+        return user_sql, False
+
+    for r in bq_rows:
+        if (r.get("query_mode") or "") != "remote":
+            continue
+        bucket = r.get("bucket")
+        source_table = r.get("source_table")
+        name = r.get("name")
+        if not (bucket and source_table and name):
+            continue
+        pattern = r'\b' + re.escape(str(name).lower()) + r'\b'
+        if re.search(pattern, sql_lower):
+            key = (bucket.lower(), source_table.lower())
+            if key not in seen_paths:
+                seen_paths.add(key)
+            name_lookups.append((str(name), bucket, source_table))
+
+    # Direct bq."ds"."tbl" references — pull the registered (bucket,
+    # source_table) pair so the inner SQL receives a backticked BQ-native
+    # path. Mismatched / unregistered paths are caught upstream by the
+    # guardrail; here we just collect the mappings the rewriter needs.
+    direct_paths: set[tuple[str, str]] = set()
+    for m in BQ_PATH.finditer(user_sql):
+        bucket_raw = m.group(1).strip('"')
+        source_table_raw = m.group(2).strip('"')
+        direct_paths.add((bucket_raw, source_table_raw))
+
+    if not name_lookups and not direct_paths:
+        # Skip 1: no BQ tables referenced.
+        return user_sql, False
+
+    # Skip 3: cross-source query (BQ + local-mode). If user SQL also
+    # references a non-BQ master view, we can't push the whole thing to
+    # BQ — DuckDB needs to do the join.
+    bq_names_lc = {n.lower() for n, _, _ in name_lookups}
+    for r in all_rows:
+        st = (r.get("source_type") or "").lower()
+        qm = (r.get("query_mode") or "").lower()
+        if st == "bigquery" and qm == "remote":
+            continue  # already handled
+        name = r.get("name")
+        if not name:
+            continue
+        name_lc = str(name).lower()
+        if name_lc in bq_names_lc:
+            # Same name registered both BQ-remote and local? Pathological;
+            # skip as a safety measure.
+            return user_sql, False
+        if re.search(r'\b' + re.escape(name_lc) + r'\b', sql_lower):
+            logger.info(
+                "rewrite_skip_cross_source: user SQL references both "
+                "BQ-remote and local-mode tables; falling back to "
+                "ATTACH-catalog path",
+            )
+            return user_sql, False
+
+    # Skip 4: BQ project not configured.
+    try:
+        bq = get_bq_access()
+        project = bq.projects.data
+    except Exception:
+        return user_sql, False
+    if not project:
+        return user_sql, False
+
+    # Rewrite identifiers, then wrap the whole thing in bigquery_query().
+    # The DuckDB BQ extension's UDF expects (<billing-project-string>,
+    # <inner-sql-string>); we use the data project so the inner SQL's
+    # backticked paths resolve to the same project.
+    inner_sql = _rewrite_bq_table_refs_to_native(user_sql, name_lookups, project)
+
+    # SQL string literal escaping: BQ accepts standard SQL doubled-quote
+    # escaping inside single-quoted strings. We pass `inner_sql` as a
+    # parameter to DuckDB's prepared statement at execute-time
+    # (analytics.execute(sql, [project, inner_sql]) shape) — but the
+    # current handler runs ``analytics.execute(rewritten_sql)`` with no
+    # params, so we MUST embed the inner SQL safely. Use parameterised
+    # form: emit ``bigquery_query(?, ?)`` and signal the params via a
+    # sentinel? No — the handler treats SQL as opaque. Instead, embed
+    # using single-quote doubling (standard SQL escape; both DuckDB and
+    # BQ honor it).
+    escaped_inner = inner_sql.replace("'", "''")
+    rewritten = (
+        f"SELECT * FROM bigquery_query('{project}', '{escaped_inner}')"
+    )
+    return rewritten, True
+
+
 @contextlib.contextmanager
 def _bq_quota_and_cap_guard(
     *,
diff --git a/tests/test_query_remote_rewrite.py b/tests/test_query_remote_rewrite.py
new file mode 100644
index 0000000..7d78a3e
--- /dev/null
+++ b/tests/test_query_remote_rewrite.py
@@ -0,0 +1,444 @@
+"""Unit tests for ``_rewrite_user_sql_for_bigquery_query``.
+
+The helper rewrites user SQL referencing query_mode='remote' BigQuery
+tables so the entire query ships to BQ via the DuckDB BQ extension's
+``bigquery_query(<project>, <sql>)`` UDF — engaging WHERE / SELECT /
+LIMIT predicate pushdown instead of falling through to ATTACH-catalog
+mode (which opens a Storage Read API session over the whole table).
+
+These tests pin down each conservative-skip rule plus the happy-path
+rewrites. Edge cases (CTE shadowing, double-wrap, mixed-source JOIN)
+are intentionally explicit so a future refactor doesn't quietly
+loosen the guard.
+"""
+from __future__ import annotations
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Test infrastructure: an in-memory DuckDB seeded with table_registry rows
+# matching the shapes the production registry produces. Avoids the full app
+# bootstrap path; the rewriter only needs ``conn.execute("SELECT * FROM
+# table_registry ...")`` to resolve names.
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def seeded_registry(tmp_path, monkeypatch):
+    """Build a fresh ``system.duckdb`` in tmp_path with the schema migrated.
+
+    Returns the open connection so tests can pass it to the rewriter.
+    Cleanup is automatic via tmp_path teardown — but we close the
+    open singleton handle first so a different DATA_DIR in the next
+    test doesn't see the previous tmp's lock.
+    """
+    from src.db import get_system_db, close_system_db
+
+    monkeypatch.setenv("DATA_DIR", str(tmp_path))
+    (tmp_path / "state").mkdir(parents=True, exist_ok=True)
+    close_system_db()
+    conn = get_system_db()
+    yield conn
+    close_system_db()
+
+
+def _register_bq_remote(conn, *, table_id, name, bucket, source_table):
+    from src.repositories.table_registry import TableRegistryRepository
+    TableRegistryRepository(conn).register(
+        id=table_id,
+        name=name,
+        source_type="bigquery",
+        bucket=bucket,
+        source_table=source_table,
+        query_mode="remote",
+    )
+
+
+def _register_local(conn, *, table_id, name, source_type="keboola"):
+    from src.repositories.table_registry import TableRegistryRepository
+    TableRegistryRepository(conn).register(
+        id=table_id,
+        name=name,
+        source_type=source_type,
+        bucket="bkt",
+        source_table=name,
+        query_mode="local",
+    )
+
+
+def _set_bq_project(monkeypatch, project="test-prj"):
+    """Stub get_bq_access so the rewriter sees a real-looking project ID."""
+    from connectors.bigquery.access import BqAccess, BqProjects, get_bq_access
+    bq = BqAccess(
+        BqProjects(billing=project, data=project),
+        client_factory=lambda projects: object(),
+    )
+    monkeypatch.setattr(
+        "app.api.query.get_bq_access",
+        lambda: bq,
+        raising=False,
+    )
+    get_bq_access.cache_clear()
+
+
+# ---------------------------------------------------------------------------
+# Happy-path rewrites
+# ---------------------------------------------------------------------------
+
+
+def test_simple_select_where_against_one_bq_table_rewrites(seeded_registry, monkeypatch):
+    """Single-table SELECT-WHERE against a registered BQ remote row →
+    full SQL wrapped in ``bigquery_query('project', '<rewritten>')``.
+    The bare-name reference gets translated to BQ-native backtick form."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        "SELECT count(*) FROM ue WHERE event_date = '2026-01-01'",
+        seeded_registry,
+    )
+
+    assert did_rewrite is True
+    # Outer wrap must be a single bigquery_query() FROM-source.
+    assert "bigquery_query(" in rewritten
+    assert "test-prj" in rewritten
+    # Inner SQL: bare name rewritten to backticked BQ-native path.
+    assert "`test-prj.fin.ue`" in rewritten
+    # WHERE predicate is preserved (single-quote-doubled for embedding
+    # inside the outer string literal — standard SQL escaping that
+    # both DuckDB and BQ honor).
+    assert "event_date = ''2026-01-01''" in rewritten
+
+
+def test_direct_bq_path_rewrites(seeded_registry, monkeypatch):
+    """User wrote the direct ``bq."ds"."tbl"`` form. The rewriter must
+    still translate to BQ-native backtick form before wrapping."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        'SELECT * FROM bq."fin"."ue" LIMIT 10',
+        seeded_registry,
+    )
+
+    assert did_rewrite is True
+    assert "bigquery_query(" in rewritten
+    assert "`test-prj.fin.ue`" in rewritten
+    # Original duckdb-flavor path must NOT remain (it'd parse-fail under BQ).
+    assert 'bq."fin"."ue"' not in rewritten
+
+
+def test_cte_referencing_bq_table_rewrites_inside_cte(seeded_registry, monkeypatch):
+    """A WITH clause whose body references a BQ table must rewrite that
+    inner reference; the wrapping happens at the top level so BQ sees a
+    valid BQ-flavor CTE."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.orders", name="orders",
+                        bucket="fin", source_table="orders")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        "WITH x AS (SELECT id FROM orders WHERE total > 0) SELECT count(*) FROM x",
+        seeded_registry,
+    )
+    assert did_rewrite is True
+    # Inner reference is rewritten.
+    assert "`test-prj.fin.orders`" in rewritten
+    # The whole thing is wrapped — bigquery_query is the outermost FROM.
+    assert rewritten.lower().count("bigquery_query(") == 1
+
+
+def test_subquery_referencing_bq_table_rewrites(seeded_registry, monkeypatch):
+    """Subquery in FROM position — same handling as a CTE: rewrite the
+    inner table reference, wrap the whole at the top."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        "SELECT s.cnt FROM (SELECT count(*) AS cnt FROM ue) s",
+        seeded_registry,
+    )
+    assert did_rewrite is True
+    assert "`test-prj.fin.ue`" in rewritten
+    assert rewritten.lower().count("bigquery_query(") == 1
+
+
+def test_multiple_bq_tables_one_project_combine(seeded_registry, monkeypatch):
+    """Two registered BQ tables in the same project → single
+    ``bigquery_query()`` wraps the whole SQL with both refs rewritten
+    inline. No separate parallel calls."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.orders", name="orders",
+                        bucket="fin", source_table="orders")
+    _register_bq_remote(seeded_registry, table_id="bq.fin.users", name="users",
+                        bucket="fin", source_table="users")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        "SELECT u.id, count(o.id) "
+        "FROM users u JOIN orders o ON u.id = o.user_id "
+        "GROUP BY u.id",
+        seeded_registry,
+    )
+    assert did_rewrite is True
+    # Both rewritten.
+    assert "`test-prj.fin.users`" in rewritten
+    assert "`test-prj.fin.orders`" in rewritten
+    # Single wrap.
+    assert rewritten.lower().count("bigquery_query(") == 1
+
+
+# ---------------------------------------------------------------------------
+# Conservative-skip cases
+# ---------------------------------------------------------------------------
+
+
+def test_join_bq_to_local_skips_rewrite(seeded_registry, monkeypatch):
+    """A JOIN between a BQ table and a local-mode (Keboola/Jira) table
+    is a cross-source query — wrapping it in bigquery_query() would lose
+    the local table. The rewriter must fall through to the ATTACH-catalog
+    path (slow but correct).
+    """
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+    _register_local(seeded_registry, table_id="kbc.in.local_orders",
+                    name="local_orders")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    user_sql = (
+        "SELECT u.id, lo.total "
+        "FROM ue u JOIN local_orders lo ON u.id = lo.user_id"
+    )
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        user_sql, seeded_registry,
+    )
+    assert did_rewrite is False
+    assert rewritten == user_sql  # untouched
+
+
+def test_no_bq_tables_passes_through(seeded_registry, monkeypatch):
+    """User SQL referencing only local-source tables → no rewrite,
+    no log spam, original SQL returned."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_local(seeded_registry, table_id="kbc.in.orders", name="orders")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    user_sql = "SELECT * FROM orders WHERE id = 1"
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        user_sql, seeded_registry,
+    )
+    assert did_rewrite is False
+    assert rewritten == user_sql
+
+
+def test_already_contains_bigquery_query_passes_through(seeded_registry, monkeypatch):
+    """User SQL already calls bigquery_query() — never double-wrap.
+
+    Note: the /api/query endpoint blocks ``bigquery_query`` in user SQL
+    via the keyword denylist, so this scenario can't reach the rewriter
+    in production today. Defensive guard for callers from other paths.
+    """
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+    _set_bq_project(monkeypatch, "test-prj")
+
+    user_sql = (
+        "SELECT * FROM bigquery_query('test-prj', 'SELECT * FROM `test-prj.fin.ue`')"
+    )
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        user_sql, seeded_registry,
+    )
+    assert did_rewrite is False
+    assert rewritten == user_sql
+
+
+def test_unconfigured_bq_project_skips(seeded_registry, monkeypatch):
+    """If get_bq_access() is the not-configured sentinel (data=''),
+    don't rewrite — there's no project to fill into bigquery_query()."""
+    from app.api.query import _rewrite_user_sql_for_bigquery_query
+    _register_bq_remote(seeded_registry, table_id="bq.fin.ue", name="ue",
+                        bucket="fin", source_table="ue")
+
+    # Override to sentinel (empty data project).
+    from connectors.bigquery.access import BqAccess, BqProjects, get_bq_access
+    monkeypatch.setattr(
+        "app.api.query.get_bq_access",
+        lambda: BqAccess(BqProjects(billing="", data="")),
+        raising=False,
+    )
+    get_bq_access.cache_clear()
+
+    user_sql = "SELECT * FROM ue"
+    rewritten, did_rewrite = _rewrite_user_sql_for_bigquery_query(
+        user_sql, seeded_registry,
+    )
+    assert did_rewrite is False
+    assert rewritten == user_sql
+
+
+# ---------------------------------------------------------------------------
+# Backwards-compat: dry-run helper still available + behaves the same
+# ---------------------------------------------------------------------------
+
+
+def test_existing_dry_run_helper_still_callable():
+    """The original ``_rewrite_user_sql_for_bq_dry_run`` is now a thin
+    wrapper around the shared core rewriter (Pass 1 + Pass 2). Callers
+    that pass an explicit ``project`` argument keep working unchanged.
+    """
+    from app.api.query import _rewrite_user_sql_for_bq_dry_run
+
+    rewritten = _rewrite_user_sql_for_bq_dry_run(
+        sql="SELECT * FROM ue",
+        name_lookups=[("ue", "fin", "ue")],
+        project="some-prj",
+    )
+    assert "`some-prj.fin.ue`" in rewritten
+    # The dry-run helper does NOT add a bigquery_query() wrapper; that's
+    # only the new execution-path helper's job.
+    assert "bigquery_query(" not in rewritten
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: the /api/query handler must invoke the rewriter and execute
+# the rewritten SQL (not the original) when there's a BQ-remote table.
+# ---------------------------------------------------------------------------
+
+
+def _auth(token: str) -> dict:
+    return {"Authorization": f"Bearer {token}"}
+
+
+def _register_bq_remote_row(name: str, bucket: str, source_table: str) -> None:
+    from src.db import get_system_db
+    from src.repositories.table_registry import TableRegistryRepository
+    sys_conn = get_system_db()
+    try:
+        TableRegistryRepository(sys_conn).register(
+            id=f"bq.{bucket}.{source_table}",
+            name=name,
+            source_type="bigquery",
+            bucket=bucket,
+            source_table=source_table,
+            query_mode="remote",
+        )
+    finally:
+        sys_conn.close()
+
+
+@pytest.fixture
+def stub_bq_for_endpoint(monkeypatch):
+    """Stub _bq_dry_run_bytes + get_bq_access at the endpoint level so the
+    cap-guard sees a real-looking BQ project but doesn't issue real RPCs.
+    """
+    monkeypatch.setattr(
+        "app.api.query._bq_dry_run_bytes",
+        lambda *a, **k: 1024,  # tiny — pass cap
+        raising=False,
+    )
+
+    class _FakeProjects:
+        data = "test-data-prj"
+        billing = "test-billing-prj"
+
+    class _FakeBqAccess:
+        projects = _FakeProjects()
+
+    monkeypatch.setattr(
+        "app.api.query.get_bq_access",
+        lambda: _FakeBqAccess(),
+        raising=False,
+    )
+
+
+def test_endpoint_executes_rewritten_sql_against_analytics(
+    seeded_app, stub_bq_for_endpoint, monkeypatch,
+):
+    """The /api/query handler must call ``analytics.execute(rewritten_sql)``
+    — NOT the user's original SQL — when a BQ-remote table is referenced.
+    Capture what reaches DuckDB and assert the bigquery_query() wrap is
+    present.
+    """
+    _register_bq_remote_row("ue", "fin", "ue")
+
+    # Capture analytics.execute calls. The handler does
+    # `analytics = get_analytics_db_readonly(); analytics.execute(sql)`,
+    # so we patch the connection factory to return a stub.
+    captured = {"sql": None}
+
+    class _StubAnalytics:
+        description = [("c0",)]
+        def execute(self, sql, *args, **kwargs):
+            captured["sql"] = sql
+            class _R:
+                def fetchmany(self, _n):
+                    return []
+            return _R()
+        def close(self):
+            pass
+
+    monkeypatch.setattr(
+        "app.api.query.get_analytics_db_readonly",
+        lambda: _StubAnalytics(),
+        raising=False,
+    )
+
+    c = seeded_app["client"]
+    token = seeded_app["admin_token"]
+    r = c.post(
+        "/api/query",
+        json={"sql": "SELECT count(*) FROM ue WHERE country = 'CZ'"},
+        headers=_auth(token),
+    )
+    assert r.status_code == 200, r.json()
+    sent = captured["sql"]
+    assert sent is not None, "analytics.execute was never called"
+    assert "bigquery_query(" in sent, (
+        f"endpoint did not wrap user SQL in bigquery_query(); sent: {sent!r}"
+    )
+    assert "test-data-prj" in sent
+    assert "`test-data-prj.fin.ue`" in sent
+
+
+def test_endpoint_passes_original_sql_when_no_bq_table(
+    seeded_app, stub_bq_for_endpoint, monkeypatch,
+):
+    """For queries that don't touch any BQ-remote registered name, the
+    handler must pass the original SQL through unchanged — the
+    ATTACH-catalog path handles local-source tables natively and any
+    rewrite would be wasted work."""
+    captured = {"sql": None}
+
+    class _StubAnalytics:
+        description = [("c0",)]
+        def execute(self, sql, *args, **kwargs):
+            captured["sql"] = sql
+            class _R:
+                def fetchmany(self, _n):
+                    return []
+            return _R()
+        def close(self):
+            pass
+
+    monkeypatch.setattr(
+        "app.api.query.get_analytics_db_readonly",
+        lambda: _StubAnalytics(),
+        raising=False,
+    )
+
+    c = seeded_app["client"]
+    token = seeded_app["admin_token"]
+    user_sql = "SELECT 1 AS x"
+    r = c.post("/api/query", json={"sql": user_sql}, headers=_auth(token))
+    assert r.status_code == 200, r.json()
+    assert captured["sql"] == user_sql
+    assert "bigquery_query(" not in captured["sql"]