From 6423888d0299b065f3b6bff50365e82951709d33 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 12:46:38 +0200 Subject: [PATCH] fix(query): #160 move bq_max_scan_bytes to data_source.bigquery (UI editable) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E test on dev VM revealed: spec said "configurable via /admin/server-config" for the cost guardrail cap, but the underlying read path was `api.query.bq_max_scan_bytes` and `api` is NOT in `_EDITABLE_SECTIONS`. POST to /admin/server-config rejected `{"sections":{"api":...}}` as "unknown section(s): api" — the cap was only adjustable via direct YAML edit. Move to `data_source.bigquery.bq_max_scan_bytes`: - `_default_remote_query_cap_bytes()` reads from the new path. - Add to `_OPTIONAL_FIELDS["data_source"]["bigquery"]["fields"]` with the same shape as `max_bytes_per_materialize` (kind=int, default 5 GiB, hint). - Add to `_BQ_OPTIONAL_FIELD_DEFAULTS` so it surfaces in the GET payload even when YAML omits it. Convention now mirrors `max_bytes_per_materialize` — both BQ cost guardrails live under `data_source.bigquery`, both editable in the UI. --- app/api/admin.py | 11 +++++++++++ app/api/query.py | 5 +++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/app/api/admin.py b/app/api/admin.py index 0514f51..947a712 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -242,6 +242,16 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = { "or sync rejected. 0 disables the gate. Default 10737418240 = 10 GiB." ), }, + "bq_max_scan_bytes": { + "kind": "int", + "default": 5368709120, + "hint": ( + "Cost guardrail for `da query --remote` against query_mode='remote' " + "BQ rows (dry-run check on the underlying SELECT before execute). " + "Bytes processed; exceeds → 400 remote_scan_too_large with a " + "`da fetch` suggestion. 0 disables the gate. Default 5368709120 = 5 GiB." + ), + }, }, }, "keboola": { @@ -795,6 +805,7 @@ class ServerConfigUpdateRequest(BaseModel): _BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = { "billing_project": "", "max_bytes_per_materialize": 10737418240, + "bq_max_scan_bytes": 5368709120, } diff --git a/app/api/query.py b/app/api/query.py index 7c56016..ce80abd 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -47,9 +47,10 @@ BQ_PATH = re.compile( def _default_remote_query_cap_bytes() -> int: """5 GiB default cap on /api/query BQ-touching scans. Configurable via - `api.query.bq_max_scan_bytes` in /admin/server-config. + `data_source.bigquery.bq_max_scan_bytes` in /admin/server-config — + sits next to `max_bytes_per_materialize` for visual symmetry. """ - raw = get_value("api", "query", "bq_max_scan_bytes", default=5_368_709_120) + raw = get_value("data_source", "bigquery", "bq_max_scan_bytes", default=5_368_709_120) try: return int(raw) if raw is not None else 5_368_709_120 except (TypeError, ValueError):