diff --git a/CHANGELOG.md b/CHANGELOG.md index 17edf92..62fbe8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ### Fixed -- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases (with the `AS \`rows\`` / `AS row_count` workaround); `Unrecognized name` / `not found inside` still points at `agnes schema `; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage. +- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases with both rename + BQ-style backtick-quote alternatives; `Unrecognized name` / `not found inside` still points at `agnes schema `; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage. ## [0.53.4] — 2026-05-12 diff --git a/app/api/query.py b/app/api/query.py index b4695c5..08586cf 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -88,13 +88,20 @@ def _hint_for_bq_bad_request(message: str) -> str: rather than always blaming columns.""" msg = message.lower() if "unexpected keyword" in msg or "syntax error" in msg: + # Plain text — this string is surfaced as JSON `hint:` and printed + # verbatim by the CLI. No markdown rendering, so avoid backtick + # quoting around BQ-style backtick identifiers (`\\\`` escape in + # a Python source literal renders the backslashes literally to + # the analyst — exactly the misleading shape this hint tries to + # fix). return ( "BigQuery rejected this on SQL syntax. Most often this is a " "reserved-keyword identifier used unquoted — e.g. " - "`SELECT COUNT(*) AS rows` fails because `rows` is reserved. " - "Backtick the alias (`AS \\`rows\\``) or rename it " - "(`AS row_count`). For other syntax errors, see the " - "`underlying` field below — it carries BigQuery's own " + "SELECT COUNT(*) AS rows fails because 'rows' is reserved. " + "Either rename the alias to a non-reserved word (AS row_count) " + "or backtick-quote it BQ-style (AS `rows` with literal " + "backticks around the identifier). For other syntax errors, " + "see the 'underlying' field below — it carries BigQuery's own " "diagnostic with the error position." ) if "unrecognized name" in msg or "not found inside" in msg or "field name" in msg: diff --git a/tests/test_api_query_guardrail.py b/tests/test_api_query_guardrail.py index 3361d9c..0e80702 100644 --- a/tests/test_api_query_guardrail.py +++ b/tests/test_api_query_guardrail.py @@ -855,6 +855,33 @@ class TestHintForBqBadRequest: # Must NOT lead with the misleading column-not-found hint assert "column referenced" not in hint.lower() + def test_no_hint_branch_leaks_literal_backslashes(self): + # Hints are surfaced as JSON `hint:` and printed verbatim by the + # CLI — no markdown rendering. A backslash-backtick in the Python + # source literal becomes a literal backslash followed by a + # backtick in the output, which is exactly the misleading shape + # this dispatcher exists to fix (see #274 follow-up). Pin every + # branch against the regression. + from app.api.query import _hint_for_bq_bad_request + + cases = [ + "Syntax error: Unexpected keyword ROWS at [1:20]", + "Unrecognized name: authorize_date at [1:88]", + "Field 'foo' not found inside record_type", + "Table not found: my-project.dataset.tbl", + "Some unfamiliar BQ diagnostic", # fallback + ] + for msg in cases: + hint = _hint_for_bq_bad_request(msg) + assert "\\`" not in hint, ( + f"hint for {msg!r} contains literal backslash-backtick: " + f"{hint!r}" + ) + assert "\\\\" not in hint, ( + f"hint for {msg!r} contains literal double-backslash: " + f"{hint!r}" + ) + def test_unrecognized_name_hint_points_at_agnes_schema(self): from app.api.query import _hint_for_bq_bad_request