From 39bdc1ff45ee1f82d914d15f744b210fbdd33390 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Sun, 3 May 2026 19:45:35 +0200 Subject: [PATCH] feat(admin): #160 BQ test-connection endpoint + billing_project placeholder UI Closes the operator-side half of the reporter's loop. The CLI fix in the previous commit makes USER_PROJECT_DENIED errors readable to analysts; this commit lets admins verify reachability proactively from /admin/server-config without waiting for analyst reports. New endpoint POST /api/admin/bigquery/test-connection (app/api/admin_bigquery_test.py, ~110 LOC): - Depends(require_admin); registered in app/main.py. - Builds BqAccess via existing get_bq_access(), runs `SELECT 1 AS ok` with a 10s polling timeout. - 200 with {ok, billing_project, data_project, elapsed_ms} on success. - 400 for `BqAccessError(not_configured)` (operator config issue). - 502 for any other typed BqAccessError or unknown upstream exception. - 504 for concurrent.futures.TimeoutError; best-effort cancel_job invoked (BQ-side cancel may still run; documented caveat). Server-config placeholder (app/api/admin.py + admin_server_config.html): - `data_source.bigquery.billing_project` field-spec gains `placeholder_from: ["data_source", "bigquery", "project"]`. - renderLeafInput's text branch reads `opts.spec.placeholder_from`, walks the loaded `original` config dict, injects `placeholder="(defaults to )"` into the input HTML at construction time. Admin sees the access.py:339-340 fallback rule visible directly in the UI without reading source. UI button: - "Test BigQuery connection" button next to data_source's Save button. - onTestBigQuery() POSTs to the endpoint, renders structured result inline (green check + elapsed_ms on success; red kind + hint on failure). Tests: 6 endpoint cases + 1 placeholder payload test = 7 GREEN. 62 total across the affected admin server-config test files. --- app/api/admin.py | 7 ++ app/api/admin_bigquery_test.py | 124 +++++++++++++++++++++ app/main.py | 2 + app/web/templates/admin_server_config.html | 59 +++++++++- 4 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 app/api/admin_bigquery_test.py diff --git a/app/api/admin.py b/app/api/admin.py index d6dad71..0514f51 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -225,6 +225,13 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = { "data project). Defaults to data_source.bigquery.project. " "Mismatch → 403 USER_PROJECT_DENIED on every BQ call." ), + # Issue #160 §4.7.5: when this field is empty in the + # admin form, the JS template shows "(defaults to )" + # as placeholder text — surfacing the access.py:339-340 + # fallback rule directly in the UI without the operator + # having to read source. Path is walked against the + # `original` config payload from GET /api/admin/server-config. + "placeholder_from": ["data_source", "bigquery", "project"], }, "max_bytes_per_materialize": { "kind": "int", diff --git a/app/api/admin_bigquery_test.py b/app/api/admin_bigquery_test.py new file mode 100644 index 0000000..7074160 --- /dev/null +++ b/app/api/admin_bigquery_test.py @@ -0,0 +1,124 @@ +"""POST /api/admin/bigquery/test-connection — admin-only health probe. + +Closes the operator-side half of #160. The reporter saw +USER_PROJECT_DENIED raw in the analyst CLI (now fixed via the structured +renderer in cli/error_render.py); this endpoint lets an admin verify the +saved BQ config from /admin/server-config WITHOUT having to wait for an +analyst to hit a query failure first. + +Implementation runs a minimal `SELECT 1` via the existing BqAccess +plumbing with a 10s polling timeout. On `concurrent.futures.TimeoutError` +the BQ job is best-effort cancelled (job continues running on BQ side +until BQ-side timeout if the cancel itself fails — documented caveat). +""" +from __future__ import annotations + +import concurrent.futures +import logging +import time + +from fastapi import APIRouter, Depends, HTTPException + +from app.auth.access import require_admin +from connectors.bigquery.access import get_bq_access, BqAccessError + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/admin/bigquery", tags=["admin"]) + +_QUERY_TIMEOUT_SECONDS = 10.0 + + +@router.post("/test-connection") +async def test_connection(_user: dict = Depends(require_admin)): + """Run `SELECT 1 AS ok` against BigQuery via the configured BqAccess. + + Returns 200 with `{ok, billing_project, data_project, elapsed_ms}` on + success. Maps known failure modes: + + - `BqAccessError(not_configured)` → 400 with the typed detail + - `BqAccessError` (other kinds) → 502 with the typed detail + - `concurrent.futures.TimeoutError` → 504 with `kind="timeout"` and + best-effort `cancel_job` invoked + """ + try: + bq = get_bq_access() + except BqAccessError as exc: + # not_configured is a 400 (operator config issue, not server fault). + status = 400 if exc.kind == "not_configured" else 502 + raise HTTPException(status_code=status, detail={ + "kind": exc.kind, + "message": exc.message, + **(exc.details or {}), + }) + + try: + client = bq.client() + except BqAccessError as exc: + status = 400 if exc.kind == "not_configured" else 502 + raise HTTPException(status_code=status, detail={ + "kind": exc.kind, + "message": exc.message, + **(exc.details or {}), + }) + + started = time.monotonic() + try: + job = client.query("SELECT 1 AS ok") + except BqAccessError as exc: + status = 400 if exc.kind == "not_configured" else 502 + raise HTTPException(status_code=status, detail={ + "kind": exc.kind, + "message": exc.message, + **(exc.details or {}), + }) + except Exception as exc: + # Fall through to upstream error — covers unexpected exception + # types from the BQ client library. + raise HTTPException(status_code=502, detail={ + "kind": "bq_upstream_error", + "message": str(exc), + }) + + try: + job.result(timeout=_QUERY_TIMEOUT_SECONDS) + except concurrent.futures.TimeoutError: + # Best-effort cancel — the BQ job keeps running on BQ side until + # it sees the cancel or hits BQ's own timeout. Swallow any cancel + # failure (we already failed; layering a cancel error is noise). + try: + client.cancel_job( + job.job_id, + location=getattr(job, "location", None), + ) + except Exception: + logger.warning("BQ cancel_job failed for job_id=%s", job.job_id) + raise HTTPException(status_code=504, detail={ + "kind": "timeout", + "elapsed_ms": int(_QUERY_TIMEOUT_SECONDS * 1000), + "hint": ( + "BigQuery did not respond in 10s. Check network and SA " + "permissions. The job was best-effort cancelled." + ), + }) + except BqAccessError as exc: + # Rare: BqAccessError surfacing from the polling loop (e.g. + # auth_failed mid-flight). + raise HTTPException(status_code=502, detail={ + "kind": exc.kind, + "message": exc.message, + **(exc.details or {}), + }) + except Exception as exc: + raise HTTPException(status_code=502, detail={ + "kind": "bq_upstream_error", + "message": str(exc), + }) + + elapsed_ms = int((time.monotonic() - started) * 1000) + return { + "ok": True, + "billing_project": bq.projects.billing, + "data_project": bq.projects.data, + "elapsed_ms": elapsed_ms, + } diff --git a/app/main.py b/app/main.py index b858749..612c0ea 100644 --- a/app/main.py +++ b/app/main.py @@ -110,6 +110,7 @@ from app.api.telegram import router as telegram_router from app.api.access import router as access_router, me_router as me_access_router from app.api.me_debug import router as me_debug_router from app.api.admin import router as admin_router +from app.api.admin_bigquery_test import router as admin_bigquery_test_router from app.api.jira_webhooks import router as jira_webhooks_router from app.api.metrics import router as metrics_router from app.api.metadata import router as metadata_router @@ -514,6 +515,7 @@ def create_app() -> FastAPI: app.include_router(catalog_router) app.include_router(telegram_router) app.include_router(admin_router) + app.include_router(admin_bigquery_test_router) app.include_router(access_router) app.include_router(me_access_router) app.include_router(me_debug_router) diff --git a/app/web/templates/admin_server_config.html b/app/web/templates/admin_server_config.html index 32a0db9..1dbf1c4 100644 --- a/app/web/templates/admin_server_config.html +++ b/app/web/templates/admin_server_config.html @@ -316,7 +316,21 @@ function renderLeafInput(fieldId, section, pathSegments, kind, value, opts, isUn const v = isUnset ? (opts && opts.spec && opts.spec.default != null ? String(opts.spec.default) : "") : (value == null ? "" : value); - return ``; + // Issue #160 §4.7.5: `placeholder_from: ["a","b","c"]` walks the loaded + // `original` config dict and shows "(defaults to )" greyed in + // the empty input. Used by data_source.bigquery.billing_project to + // surface the access.py:339-340 billing→data fallback in the UI. + let placeholderAttr = ""; + if (isUnset && opts && opts.spec && Array.isArray(opts.spec.placeholder_from)) { + const resolved = opts.spec.placeholder_from.reduce( + (cur, k) => (cur && typeof cur === "object" ? cur[k] : undefined), + original, + ); + if (resolved !== undefined && resolved !== null && resolved !== "") { + placeholderAttr = ` placeholder="(defaults to ${escHtml(String(resolved))})"`; + } + } + return ``; } // Cast a string raw value to the JS type implied by an item_kind / value_kind. @@ -680,6 +694,10 @@ function renderSection(section, payload, knownForSection) {
+ ${section === "data_source" ? ` + + + ` : ""}
`; } @@ -695,6 +713,12 @@ function renderAll(data) { wrap.querySelectorAll('[data-action="save-section"]').forEach(btn => btn.addEventListener("click", () => onSaveSection(btn.dataset.section))); + // Issue #160 §4.9: Test BigQuery connection — admin probe to verify the + // saved data_source.bigquery config is reachable WITHOUT having to + // ssh to the VM or wait for an analyst's failed query. + wrap.querySelectorAll('[data-action="test-bigquery"]').forEach(btn => + btn.addEventListener("click", () => onTestBigQuery(btn))); + // Wire array-of-scalars + map-of-scalars add/remove buttons via event // delegation on the wrapper. Re-attaching after every renderAll() is // fine because we replace innerHTML wholesale on each load. @@ -974,6 +998,39 @@ function collectSection(section) { return patch; } +// ── BigQuery test connection (#160 §4.9) ─────────────────────────────── +async function onTestBigQuery(btn) { + const resultEl = btn.parentElement.querySelector(".bq-test-result"); + resultEl.hidden = false; + resultEl.textContent = "Testing…"; + resultEl.style.color = ""; + btn.disabled = true; + try { + const r = await fetch("/api/admin/bigquery/test-connection", { + method: "POST", + credentials: "include", + }); + if (r.ok) { + const body = await r.json(); + resultEl.textContent = `✓ ok (${body.elapsed_ms} ms; billing=${body.billing_project}, data=${body.data_project})`; + resultEl.style.color = "#2a8c4a"; + } else { + let body; + try { body = await r.json(); } catch (_) { body = await r.text(); } + const detail = body && typeof body === "object" ? body.detail : body; + const kind = detail && typeof detail === "object" ? (detail.kind || "error") : "error"; + const hint = detail && typeof detail === "object" ? (detail.hint || detail.message || "") : String(detail); + resultEl.textContent = `✗ ${kind}${hint ? " — " + hint : ""}`; + resultEl.style.color = "#c0392b"; + } + } catch (e) { + resultEl.textContent = `✗ network error — ${e.message}`; + resultEl.style.color = "#c0392b"; + } finally { + btn.disabled = false; + } +} + // ── Save flow ───────────────────────────────────────────────────────── async function onSaveSection(section) { hideBanner();