diff --git a/CHANGELOG.md b/CHANGELOG.md index 913af0a..9a598e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,17 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C materialize, and the orchestrator's remote-attach. ### Fixed +- **BigQuery `responseTooLarge` no longer surfaces as a generic 400 / 502 with + the raw upstream message** (`connectors/bigquery/access.py`). The + `translate_bq_error` helper now classifies "Response too large to return" + errors via a dedicated `bq_response_too_large` kind (HTTP 400) with an + actionable hint pointing at the WHERE / aggregation / materialized-table + remediations. Pre-fix this failure mode fell through to the generic + `bq_bad_request` mapping, which implied the user's SQL had a syntax error + — wrong root cause. Affects every BQ-touching path (`/api/query`, + `/api/v2/scan`, `/api/v2/sample`, `/api/v2/schema`, materialize) since + they all share `translate_bq_error`. + - **`/api/query` (and `agnes query --remote`) now rewrites user SQL referencing `query_mode='remote'` BigQuery rows into a single `bigquery_query()` call before execute** (`app/api/query.py`). Pre-fix the master view diff --git a/connectors/bigquery/access.py b/connectors/bigquery/access.py index b29483a..5dee173 100644 --- a/connectors/bigquery/access.py +++ b/connectors/bigquery/access.py @@ -44,6 +44,12 @@ class BqAccessError(Exception): "bq_forbidden": 502, # other Forbidden from BQ "bq_bad_request": 400, # 400 from BQ when caller flagged it as client-derived "bq_upstream_error": 502, # all other upstream BQ failures + # `responseTooLarge` is a BQ refusal whose root cause is query shape + # (the user asked for too many rows back inline), not auth or syntax. + # 400 with a specific actionable hint instead of the generic + # bq_bad_request / bq_upstream_error mappings, which surfaced the + # raw BQ message and gave operators no path forward. + "bq_response_too_large": 400, } def __init__(self, kind: str, message: str, details: dict | None = None): @@ -53,6 +59,43 @@ class BqAccessError(Exception): super().__init__(message) +_RESPONSE_TOO_LARGE_HINT = ( + "BigQuery refused to return the result inline; the query exceeded BQ's " + "response size limit. Narrow the WHERE clause, aggregate further, " + "select fewer columns, or query a materialized table that's already " + "been bounded server-side." +) + + +def _classify_response_too_large(msg: str, projects: BqProjects) -> BqAccessError: + """Build the `bq_response_too_large` BqAccessError with the canonical + actionable hint and the original BQ message preserved in details for + operator debugging.""" + return BqAccessError( + "bq_response_too_large", + _RESPONSE_TOO_LARGE_HINT, + details={ + "original": msg, + "billing_project": projects.billing, + "data_project": projects.data, + }, + ) + + +def _is_response_too_large(msg: str) -> bool: + """Detect BQ's `responseTooLarge` failure mode by message substring. + + The reason code is stable across HTTP transports (gax.BadRequest from + google-cloud-bigquery, duckdb.IOException from the BQ extension's own + HTTP layer); both surface 'Response too large to return' verbatim in + the message body. Match case-insensitively + tolerate the slight + variant 'response too large' that some surfaces emit without the + 'to return' suffix. + """ + ml = msg.lower() + return "response too large" in ml + + def translate_bq_error( e: Exception, projects: BqProjects, @@ -69,12 +112,24 @@ def translate_bq_error( 2. Forbidden + 'serviceusage' in str(e).lower() -> cross_project_forbidden (with hint) 3. Forbidden -> bq_forbidden - 4. BadRequest, bad_request_status='client_error' + 4. 'response too large' in str(e).lower() + -> bq_response_too_large (HTTP 400, with + actionable hint pointing at WHERE / + aggregate / materialized remediations) + 5. BadRequest, bad_request_status='client_error' -> bq_bad_request (HTTP 400) - 5. BadRequest, bad_request_status='upstream_error' + 6. BadRequest, bad_request_status='upstream_error' -> bq_upstream_error (HTTP 502) - 6. GoogleAPICallError (other) -> bq_upstream_error - 7. Anything else -> RE-RAISED unchanged (don't swallow programmer errors) + 7. GoogleAPICallError (other) -> bq_upstream_error + 8. Anything else -> RE-RAISED unchanged (don't swallow programmer errors) + + The `responseTooLarge` mapping (4) sits ahead of the generic BadRequest + cases on purpose: BQ surfaces this failure mode as a 400 with a + specific reason, but the actionable remediation is "shape your query + differently" — not "your SQL has a syntax error" (the typical + bq_bad_request user-facing meaning) and not "BQ is broken" + (bq_upstream_error). Routing it via its own kind keeps the user-facing + message tight + correct. """ if isinstance(e, BqAccessError): return e @@ -108,6 +163,13 @@ def translate_bq_error( details={"billing_project": projects.billing, "data_project": projects.data}, ) + # Special-case: `responseTooLarge` arrives as gax.BadRequest (HTTP 400) + # but has a unique reason code with a specific, actionable remediation. + # Catch it BEFORE the generic BadRequest mapping below so it doesn't + # surface as a confusing "bad request" (which implies bad SQL). + if _is_response_too_large(msg): + return _classify_response_too_large(msg, projects) + if isinstance(e, gax.BadRequest): if bad_request_status == "client_error": return BqAccessError("bq_bad_request", msg) diff --git a/tests/test_bq_access.py b/tests/test_bq_access.py index ed69011..5f64918 100644 --- a/tests/test_bq_access.py +++ b/tests/test_bq_access.py @@ -37,6 +37,12 @@ class TestBqAccessError: "bq_forbidden": 502, "bq_bad_request": 400, "bq_upstream_error": 502, + # User-facing class for "Response too large to return" — an + # upstream BQ refusal, but caused by query shape (too many rows + # to fit in a single jobs.query response) rather than auth or + # syntax. 400 so the user sees an actionable error and not a + # 502 that suggests "BQ is broken". + "bq_response_too_large": 400, } assert BqAccessError.HTTP_STATUS == expected @@ -144,6 +150,70 @@ class TestTranslateBqError: translate_bq_error(ValueError("not a BQ error"), self.projects, bad_request_status="client_error") + def test_response_too_large_via_gax_bad_request(self): + """BQ ``responseTooLarge`` arrives as ``gax.BadRequest`` (HTTP 400 + with a specific `reason` field). Pre-fix this fell through to the + generic ``bq_bad_request`` mapping — surfacing as a 400 with the + raw upstream message and no actionable hint. Now it routes to a + dedicated ``bq_response_too_large`` kind whose message tells the + user exactly what to do (narrow WHERE / aggregate / use materialized). + """ + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Response too large to return. Consider setting allowLargeResults to true ...") + result = translate_bq_error( + e, self.projects, bad_request_status="client_error", + ) + assert result.kind == "bq_response_too_large", ( + f"got {result.kind!r}; expected dedicated mapping for " + "'Response too large' to avoid the generic bq_bad_request 400 " + "with no actionable hint" + ) + # User-facing message must point at the actionable remediations, + # not just echo the raw BQ string. + assert "exceeded" in result.message.lower() or "too large" in result.message.lower() + assert "where" in result.message.lower() or "aggregate" in result.message.lower() or "materialized" in result.message.lower() + # Original upstream text preserved in details for operator debugging. + assert "original" in result.details + assert "Response too large" in result.details["original"] + + def test_response_too_large_via_duckdb_native_string(self): + """DuckDB-native exceptions (the BQ extension's C++ HTTP path) + carry the same 'Response too large' marker in plain ``Exception`` + messages — must classify the same way as the gax.BadRequest case.""" + from connectors.bigquery.access import translate_bq_error + e = Exception("HTTP 400: Response too large to return.") + result = translate_bq_error( + e, self.projects, bad_request_status="upstream_error", + ) + assert result.kind == "bq_response_too_large" + + def test_response_too_large_classification_is_status_independent(self): + """The mapping must fire regardless of ``bad_request_status`` + (some callers route via 'upstream_error', others via 'client_error'). + It's the BQ error shape that matters, not who's calling.""" + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Response too large to return") + for status in ("client_error", "upstream_error"): + result = translate_bq_error(e, self.projects, bad_request_status=status) + assert result.kind == "bq_response_too_large", ( + f"bad_request_status={status!r} routed to {result.kind!r}; " + "expected bq_response_too_large for both" + ) + + def test_response_too_large_does_not_trigger_on_unrelated_bad_request(self): + """Other BadRequests (syntax errors, malformed identifiers, …) + must keep going through the generic bq_bad_request mapping — only + the 'Response too large' substring triggers the dedicated kind.""" + from google.api_core.exceptions import BadRequest + from connectors.bigquery.access import translate_bq_error + e = BadRequest("Syntax error at [1:23] near unexpected token") + result = translate_bq_error( + e, self.projects, bad_request_status="client_error", + ) + assert result.kind == "bq_bad_request" + class TestDefaultClientFactory: def test_constructs_client_with_billing_project_as_quota(self, monkeypatch):