fix(bq-hint): drop literal backslash escapes from syntax-error hint string (#275)
PR #274 (just merged) introduced `\`AS \\\`rows\\\`\`` in the syntax-error branch of _hint_for_bq_bad_request. Python doesn't recognize \\\` as an escape sequence, so the literal backslashes survived into the JSON `hint:` field. Analyst reading the CLI error saw: Backtick the alias (`AS \`rows\``) or rename it ... with visible backslashes — exactly the misleading shape this dispatcher exists to clean up. Self-review caught it; this PR replaces the problematic substring with plain prose ("rename the alias to a non-reserved word (AS row_count) or backtick-quote it BQ-style (AS `rows` with literal backticks around the identifier)") that needs no escape gymnastics. New regression test test_no_hint_branch_leaks_literal_backslashes pins every dispatch branch against `\\\`` and `\\\\` substrings — pytest now catches this class on the next regression instead of waiting for an analyst to spot it. CHANGELOG bullet rephrased to match (the same broken backslashes leaked into the [Unreleased] entry). Verified: 4162 tests pass; 26 in test_api_query_guardrail.py green; demo print of the syntax-error branch shows clean output.
This commit is contained in:
parent
5458ccc41b
commit
1ade1300c6
3 changed files with 39 additions and 5 deletions
|
|
@ -16,7 +16,7 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
|||
|
||||
### Fixed
|
||||
|
||||
- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases (with the `AS \`rows\`` / `AS row_count` workaround); `Unrecognized name` / `not found inside` still points at `agnes schema <id>`; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage.
|
||||
- **`/api/query` `remote_estimate_failed` hint now branches on the BigQuery error class** instead of always claiming a column doesn't exist. The previous hardcoded "Most often this means a column referenced … doesn't exist" misled analysts whenever BigQuery actually rejected on syntax (e.g. `SELECT COUNT(*) AS rows` — `rows` is reserved, BQ returns `Syntax error: Unexpected keyword ROWS at [1:20]`, the previous hint pointed at non-existent columns). Branching: syntax errors get a hint about reserved-keyword aliases with both rename + BQ-style backtick-quote alternatives; `Unrecognized name` / `not found inside` still points at `agnes schema <id>`; `Table not found` points at `agnes catalog`; the fallback hint enumerates all three causes for the analyst to triage.
|
||||
|
||||
## [0.53.4] — 2026-05-12
|
||||
|
||||
|
|
|
|||
|
|
@ -88,13 +88,20 @@ def _hint_for_bq_bad_request(message: str) -> str:
|
|||
rather than always blaming columns."""
|
||||
msg = message.lower()
|
||||
if "unexpected keyword" in msg or "syntax error" in msg:
|
||||
# Plain text — this string is surfaced as JSON `hint:` and printed
|
||||
# verbatim by the CLI. No markdown rendering, so avoid backtick
|
||||
# quoting around BQ-style backtick identifiers (`\\\`` escape in
|
||||
# a Python source literal renders the backslashes literally to
|
||||
# the analyst — exactly the misleading shape this hint tries to
|
||||
# fix).
|
||||
return (
|
||||
"BigQuery rejected this on SQL syntax. Most often this is a "
|
||||
"reserved-keyword identifier used unquoted — e.g. "
|
||||
"`SELECT COUNT(*) AS rows` fails because `rows` is reserved. "
|
||||
"Backtick the alias (`AS \\`rows\\``) or rename it "
|
||||
"(`AS row_count`). For other syntax errors, see the "
|
||||
"`underlying` field below — it carries BigQuery's own "
|
||||
"SELECT COUNT(*) AS rows fails because 'rows' is reserved. "
|
||||
"Either rename the alias to a non-reserved word (AS row_count) "
|
||||
"or backtick-quote it BQ-style (AS `rows` with literal "
|
||||
"backticks around the identifier). For other syntax errors, "
|
||||
"see the 'underlying' field below — it carries BigQuery's own "
|
||||
"diagnostic with the error position."
|
||||
)
|
||||
if "unrecognized name" in msg or "not found inside" in msg or "field name" in msg:
|
||||
|
|
|
|||
|
|
@ -855,6 +855,33 @@ class TestHintForBqBadRequest:
|
|||
# Must NOT lead with the misleading column-not-found hint
|
||||
assert "column referenced" not in hint.lower()
|
||||
|
||||
def test_no_hint_branch_leaks_literal_backslashes(self):
|
||||
# Hints are surfaced as JSON `hint:` and printed verbatim by the
|
||||
# CLI — no markdown rendering. A backslash-backtick in the Python
|
||||
# source literal becomes a literal backslash followed by a
|
||||
# backtick in the output, which is exactly the misleading shape
|
||||
# this dispatcher exists to fix (see #274 follow-up). Pin every
|
||||
# branch against the regression.
|
||||
from app.api.query import _hint_for_bq_bad_request
|
||||
|
||||
cases = [
|
||||
"Syntax error: Unexpected keyword ROWS at [1:20]",
|
||||
"Unrecognized name: authorize_date at [1:88]",
|
||||
"Field 'foo' not found inside record_type",
|
||||
"Table not found: my-project.dataset.tbl",
|
||||
"Some unfamiliar BQ diagnostic", # fallback
|
||||
]
|
||||
for msg in cases:
|
||||
hint = _hint_for_bq_bad_request(msg)
|
||||
assert "\\`" not in hint, (
|
||||
f"hint for {msg!r} contains literal backslash-backtick: "
|
||||
f"{hint!r}"
|
||||
)
|
||||
assert "\\\\" not in hint, (
|
||||
f"hint for {msg!r} contains literal double-backslash: "
|
||||
f"{hint!r}"
|
||||
)
|
||||
|
||||
def test_unrecognized_name_hint_points_at_agnes_schema(self):
|
||||
from app.api.query import _hint_for_bq_bad_request
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue