From 14db85f506f34093e350f16cc6c55da9ef456e8a Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Wed, 6 May 2026 13:09:31 +0200 Subject: [PATCH] fix(bq): map 'Response too large' to its own error class instead of generic bad_request translate_bq_error previously mapped BQ's responseTooLarge failure mode to bq_bad_request (HTTP 400 with the raw upstream message). The user- facing implication ('your SQL has a syntax error') is wrong -- the root cause is query shape (BQ refused to return the result inline because it exceeded the response size limit), and the actionable remediation is 'narrow the WHERE clause, aggregate further, or use a materialized table'. Add bq_response_too_large as a first-class BqAccessError kind (also 400) with a canonical hint message; original BQ message preserved in details for operator debugging. Detection is substring-based on 'response too large' and fires before the generic BadRequest path so the dedicated mapping always wins. Affects every BQ-touching path since they all share translate_bq_error -- /api/query, /api/v2/{scan,sample,schema}, materialize. --- CHANGELOG.md | 11 ++++++ connectors/bigquery/access.py | 70 +++++++++++++++++++++++++++++++++-- tests/test_bq_access.py | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 913af0a..9a598e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,17 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C materialize, and the orchestrator's remote-attach. ### Fixed +- **BigQuery `responseTooLarge` no longer surfaces as a generic 400 / 502 with + the raw upstream message** (`connectors/bigquery/access.py`). The + `translate_bq_error` helper now classifies "Response too large to return" + errors via a dedicated `bq_response_too_large` kind (HTTP 400) with an + actionable hint pointing at the WHERE / aggregation / materialized-table + remediations. Pre-fix this failure mode fell through to the generic + `bq_bad_request` mapping, which implied the user's SQL had a syntax error + — wrong root cause. Affects every BQ-touching path (`/api/query`, + `/api/v2/scan`, `/api/v2/sample`, `/api/v2/schema`, materialize) since + they all share `translate_bq_error`. + - **`/api/query` (and `agnes query --remote`) now rewrites user SQL referencing `query_mode='remote'` BigQuery rows into a single `bigquery_query()` call before execute** (`app/api/query.py`). Pre-fix the master view diff --git a/connectors/bigquery/access.py b/connectors/bigquery/access.py index b29483a..5dee173 100644 --- a/connectors/bigquery/access.py +++ b/connectors/bigquery/access.py @@ -44,6 +44,12 @@ class BqAccessError(Exception): "bq_forbidden": 502, # other Forbidden from BQ "bq_bad_request": 400, # 400 from BQ when caller flagged it as client-derived "bq_upstream_error": 502, # all other upstream BQ failures + # `responseTooLarge` is a BQ refusal whose root cause is query shape + # (the user asked for too many rows back inline), not auth or syntax. + # 400 with a specific actionable hint instead of the generic + # bq_bad_request / bq_upstream_error mappings, which surfaced the + # raw BQ message and gave operators no path forward. + "bq_response_too_large": 400, } def __init__(self, kind: str, message: str, details: dict | None = None): @@ -53,6 +59,43 @@ class BqAccessError(Exception): super().__init__(message) +_RESPONSE_TOO_LARGE_HINT = ( + "BigQuery refused to return the result inline; the query exceeded BQ's " + "response size limit. Narrow the WHERE clause, aggregate further, " + "select fewer columns, or query a materialized table that's already " + "been bounded server-side." +) + + +def _classify_response_too_large(msg: str, projects: BqProjects) -> BqAccessError: + """Build the `bq_response_too_large` BqAccessError with the canonical + actionable hint and the original BQ message preserved in details for + operator debugging.""" + return BqAccessError( + "bq_response_too_large", + _RESPONSE_TOO_LARGE_HINT, + details={ + "original": msg, + "billing_project": projects.billing, + "data_project": projects.data, + }, + ) + + +def _is_response_too_large(msg: str) -> bool: + """Detect BQ's `responseTooLarge` failure mode by message substring. + + The reason code is stable across HTTP transports (gax.BadRequest from + google-cloud-bigquery, duckdb.IOException from the BQ extension's own + HTTP layer); both surface 'Response too large to return' verbatim in + the message body. Match case-insensitively + tolerate the slight + variant 'response too large' that some surfaces emit without the + 'to return' suffix. + """ + ml = msg.lower() + return "response too large" in ml + + def translate_bq_error( e: Exception, projects: BqProjects, @@ -69,12 +112,24 @@ def translate_bq_error( 2. Forbidden + 'serviceusage' in str(e).lower() -> cross_project_forbidden (with hint) 3. Forbidden -> bq_forbidden - 4. BadRequest, bad_request_status='client_error' + 4. 'response too large' in str(e).lower() + -> bq_response_too_large (HTTP 400, with + actionable hint pointing at WHERE / + aggregate / materialized remediations) + 5. BadRequest, bad_request_status='client_error' -> bq_bad_request (HTTP 400) - 5. BadRequest, bad_request_status='upstream_error' + 6. BadRequest, bad_request_status='upstream_error' -> bq_upstream_error (HTTP 502) - 6. GoogleAPICallError (other) -> bq_upstream_error - 7. Anything else -> RE-RAISED unchanged (don't swallow programmer errors) + 7. GoogleAPICallError (other) -> bq_upstream_error + 8. Anything else -> RE-RAISED unchanged (don't swallow programmer errors) + + The `responseTooLarge` mapping (4) sits ahead of the generic BadRequest + cases on purpose: BQ surfaces this failure mode as a 400 with a + specific reason, but the actionable remediation is "shape your query + differently" — not "your SQL has a syntax error" (the typical + bq_bad_request user-facing meaning) and not "BQ is broken" + (bq_upstream_error). Routing it via its own kind keeps the user-facing + message tight + correct. """ if isinstance(e, BqAccessError): return e @@ -108,6 +163,13 @@ def translate_bq_error( details={"billing_project": projects.billing, "data_project": projects.data}, ) + # Special-case: `responseTooLarge` arrives as gax.BadRequest (HTTP 400) + # but has a unique reason code with a specific, actionable remediation. + # Catch it BEFORE the generic BadRequest mapping below so it doesn't + # surface as a confusing "bad request" (which implies bad SQL). + if _is_response_too_large(msg): + return _classify_response_too_large(msg, projects) + if isinstance(e, gax.BadRequest): if bad_request_status == "client_error": return BqAccessError("bq_bad_request", msg) diff --git a/tests/test_bq_access.py b/tests/test_bq_access.py index ed69011..5f64918 100644 --- a/tests/test_bq_access.py +++ b/tests/test_bq_access.py @@ -37,6 +37,12 @@ class TestBqAccessError: "bq_forbidden": 502, "bq_bad_request": 400, "bq_upstream_error": 502, + # User-facing class for "Response too large to return" — an + # upstream BQ refusal, but caused by query shape (too many rows + # to fit in a single jobs.query response) rather than auth or + # syntax. 400 so the user sees an actionable error and not a + # 502 that suggests "BQ is broken". + "bq_response_too_large": 400, } assert BqAccessError.HTTP_STATUS == expected @@ -144,6 +150,70 @@ class TestTranslateBqError: translate_bq_error(ValueError("not a BQ error"), self.projects, bad_request_status="client_error") + def test_response_too_large_via_gax_bad_request(self): + """BQ ``responseTooLarge`` arrives as ``gax.BadRequest`` (HTTP 400 + with a specific `reason` field). Pre-fix this fell through to the + generic ``bq_bad_request`` mapping — surfacing as a 400 with the + raw upstream message and no actionable hint. Now it routes to a + dedicated ``bq_response_too_large`` kind whose message tells the + user exactly what to do (narrow WHERE / aggregate / use materialized). + """ + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Response too large to return. Consider setting allowLargeResults to true ...") + result = translate_bq_error( + e, self.projects, bad_request_status="client_error", + ) + assert result.kind == "bq_response_too_large", ( + f"got {result.kind!r}; expected dedicated mapping for " + "'Response too large' to avoid the generic bq_bad_request 400 " + "with no actionable hint" + ) + # User-facing message must point at the actionable remediations, + # not just echo the raw BQ string. + assert "exceeded" in result.message.lower() or "too large" in result.message.lower() + assert "where" in result.message.lower() or "aggregate" in result.message.lower() or "materialized" in result.message.lower() + # Original upstream text preserved in details for operator debugging. + assert "original" in result.details + assert "Response too large" in result.details["original"] + + def test_response_too_large_via_duckdb_native_string(self): + """DuckDB-native exceptions (the BQ extension's C++ HTTP path) + carry the same 'Response too large' marker in plain ``Exception`` + messages — must classify the same way as the gax.BadRequest case.""" + from connectors.bigquery.access import translate_bq_error + e = Exception("HTTP 400: Response too large to return.") + result = translate_bq_error( + e, self.projects, bad_request_status="upstream_error", + ) + assert result.kind == "bq_response_too_large" + + def test_response_too_large_classification_is_status_independent(self): + """The mapping must fire regardless of ``bad_request_status`` + (some callers route via 'upstream_error', others via 'client_error'). + It's the BQ error shape that matters, not who's calling.""" + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Response too large to return") + for status in ("client_error", "upstream_error"): + result = translate_bq_error(e, self.projects, bad_request_status=status) + assert result.kind == "bq_response_too_large", ( + f"bad_request_status={status!r} routed to {result.kind!r}; " + "expected bq_response_too_large for both" + ) + + def test_response_too_large_does_not_trigger_on_unrelated_bad_request(self): + """Other BadRequests (syntax errors, malformed identifiers, …) + must keep going through the generic bq_bad_request mapping — only + the 'Response too large' substring triggers the dedicated kind.""" + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Syntax error at [1:23] near unexpected token") + result = translate_bq_error( + e, self.projects, bad_request_status="client_error", + ) + assert result.kind == "bq_bad_request" + class TestDefaultClientFactory: def test_constructs_client_with_billing_project_as_quota(self, monkeypatch):