From 1ade1300c617bdd27476c9e0db92bec34d6f7797 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr <139972147+ZdenekSrotyr@users.noreply.github.com> Date: Tue, 12 May 2026 20:57:46 +0200 Subject: [PATCH] fix(bq-hint): drop literal backslash escapes from syntax-error hint string (#275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #274 (just merged) introduced `\`AS \\\`rows\\\`\`` in the syntax-error branch of _hint_for_bq_bad_request. Python doesn't recognize \\\` as an escape sequence, so the literal backslashes survived into the JSON `hint:` field. Analyst reading the CLI error saw: Backtick the alias (`AS \`rows\``) or rename it ... with visible backslashes — exactly the misleading shape this dispatcher exists to clean up. Self-review caught it; this PR replaces the problematic substring with plain prose ("rename the alias to a non-reserved word (AS row_count) or backtick-quote it BQ-style (AS `rows` with literal backticks around the identifier)") that needs no escape gymnastics. New regression test test_no_hint_branch_leaks_literal_backslashes pins every dispatch branch against `\\\`` and `\\\\` substrings — pytest now catches this class on the next regression instead of waiting for an analyst to spot it. CHANGELOG bullet rephrased to match (the same broken backslashes leaked into the [Unreleased] entry). Verified: 4162 tests pass; 26 in test_api_query_guardrail.py green; demo print of the syntax-error branch shows clean output. --- CHANGELOG.md | 2 +- app/api/query.py | 15 +++++++++++---- tests/test_api_query_guardrail.py | 27 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17edf92..62fbe8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ### Fixed -- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases (with the `AS \`rows\`` / `AS row_count` workaround); `Unrecognized name` / `not found inside` still points at `agnes schema `; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage. +- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases with both rename + BQ-style backtick-quote alternatives; `Unrecognized name` / `not found inside` still points at `agnes schema `; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage. ## [0.53.4] — 2026-05-12 diff --git a/app/api/query.py b/app/api/query.py index b4695c5..08586cf 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -88,13 +88,20 @@ def _hint_for_bq_bad_request(message: str) -> str: rather than always blaming columns.""" msg = message.lower() if "unexpected keyword" in msg or "syntax error" in msg: + # Plain text — this string is surfaced as JSON `hint:` and printed + # verbatim by the CLI. No markdown rendering, so avoid backtick + # quoting around BQ-style backtick identifiers (`\\\`` escape in + # a Python source literal renders the backslashes literally to + # the analyst — exactly the misleading shape this hint tries to + # fix). return ( "BigQuery rejected this on SQL syntax. Most often this is a " "reserved-keyword identifier used unquoted — e.g. " - "`SELECT COUNT(*) AS rows` fails because `rows` is reserved. " - "Backtick the alias (`AS \\`rows\\``) or rename it " - "(`AS row_count`). For other syntax errors, see the " - "`underlying` field below — it carries BigQuery's own " + "SELECT COUNT(*) AS rows fails because 'rows' is reserved. " + "Either rename the alias to a non-reserved word (AS row_count) " + "or backtick-quote it BQ-style (AS `rows` with literal " + "backticks around the identifier). For other syntax errors, " + "see the 'underlying' field below — it carries BigQuery's own " "diagnostic with the error position." ) if "unrecognized name" in msg or "not found inside" in msg or "field name" in msg: diff --git a/tests/test_api_query_guardrail.py b/tests/test_api_query_guardrail.py index 3361d9c..0e80702 100644 --- a/tests/test_api_query_guardrail.py +++ b/tests/test_api_query_guardrail.py @@ -855,6 +855,33 @@ class TestHintForBqBadRequest: # Must NOT lead with the misleading column-not-found hint assert "column referenced" not in hint.lower() + def test_no_hint_branch_leaks_literal_backslashes(self): + # Hints are surfaced as JSON `hint:` and printed verbatim by the + # CLI — no markdown rendering. A backslash-backtick in the Python + # source literal becomes a literal backslash followed by a + # backtick in the output, which is exactly the misleading shape + # this dispatcher exists to fix (see #274 follow-up). Pin every + # branch against the regression. + from app.api.query import _hint_for_bq_bad_request + + cases = [ + "Syntax error: Unexpected keyword ROWS at [1:20]", + "Unrecognized name: authorize_date at [1:88]", + "Field 'foo' not found inside record_type", + "Table not found: my-project.dataset.tbl", + "Some unfamiliar BQ diagnostic", # fallback + ] + for msg in cases: + hint = _hint_for_bq_bad_request(msg) + assert "\\`" not in hint, ( + f"hint for {msg!r} contains literal backslash-backtick: " + f"{hint!r}" + ) + assert "\\\\" not in hint, ( + f"hint for {msg!r} contains literal double-backslash: " + f"{hint!r}" + ) + def test_unrecognized_name_hint_points_at_agnes_schema(self): from app.api.query import _hint_for_bq_bad_request