Closes the operator-side half of the reporter's loop. The CLI fix in
the previous commit makes USER_PROJECT_DENIED errors readable to
analysts; this commit lets admins verify reachability proactively
from /admin/server-config without waiting for analyst reports.
New endpoint POST /api/admin/bigquery/test-connection
(app/api/admin_bigquery_test.py, ~110 LOC):
- Depends(require_admin); registered in app/main.py.
- Builds BqAccess via existing get_bq_access(), runs `SELECT 1 AS ok`
with a 10s polling timeout.
- 200 with {ok, billing_project, data_project, elapsed_ms} on success.
- 400 for `BqAccessError(not_configured)` (operator config issue).
- 502 for any other typed BqAccessError or unknown upstream exception.
- 504 for concurrent.futures.TimeoutError; best-effort cancel_job
invoked (BQ-side cancel may still run; documented caveat).
Server-config placeholder (app/api/admin.py + admin_server_config.html):
- `data_source.bigquery.billing_project` field-spec gains
`placeholder_from: ["data_source", "bigquery", "project"]`.
- renderLeafInput's text branch reads `opts.spec.placeholder_from`,
walks the loaded `original` config dict, injects
`placeholder="(defaults to <project>)"` into the input HTML at
construction time. Admin sees the access.py:339-340 fallback rule
visible directly in the UI without reading source.
UI button:
- "Test BigQuery connection" button next to data_source's Save button.
- onTestBigQuery() POSTs to the endpoint, renders structured result
inline (green check + elapsed_ms on success; red kind + hint on
failure).
Tests: 6 endpoint cases + 1 placeholder payload test = 7 GREEN. 62
total across the affected admin server-config test files.
124 lines
4.4 KiB
Python
124 lines
4.4 KiB
Python
"""POST /api/admin/bigquery/test-connection — admin-only health probe.
|
|
|
|
Closes the operator-side half of #160. The reporter saw
|
|
USER_PROJECT_DENIED raw in the analyst CLI (now fixed via the structured
|
|
renderer in cli/error_render.py); this endpoint lets an admin verify the
|
|
saved BQ config from /admin/server-config WITHOUT having to wait for an
|
|
analyst to hit a query failure first.
|
|
|
|
Implementation runs a minimal `SELECT 1` via the existing BqAccess
|
|
plumbing with a 10s polling timeout. On `concurrent.futures.TimeoutError`
|
|
the BQ job is best-effort cancelled (job continues running on BQ side
|
|
until BQ-side timeout if the cancel itself fails — documented caveat).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import concurrent.futures
|
|
import logging
|
|
import time
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
|
|
from app.auth.access import require_admin
|
|
from connectors.bigquery.access import get_bq_access, BqAccessError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/admin/bigquery", tags=["admin"])
|
|
|
|
_QUERY_TIMEOUT_SECONDS = 10.0
|
|
|
|
|
|
@router.post("/test-connection")
|
|
async def test_connection(_user: dict = Depends(require_admin)):
|
|
"""Run `SELECT 1 AS ok` against BigQuery via the configured BqAccess.
|
|
|
|
Returns 200 with `{ok, billing_project, data_project, elapsed_ms}` on
|
|
success. Maps known failure modes:
|
|
|
|
- `BqAccessError(not_configured)` → 400 with the typed detail
|
|
- `BqAccessError` (other kinds) → 502 with the typed detail
|
|
- `concurrent.futures.TimeoutError` → 504 with `kind="timeout"` and
|
|
best-effort `cancel_job` invoked
|
|
"""
|
|
try:
|
|
bq = get_bq_access()
|
|
except BqAccessError as exc:
|
|
# not_configured is a 400 (operator config issue, not server fault).
|
|
status = 400 if exc.kind == "not_configured" else 502
|
|
raise HTTPException(status_code=status, detail={
|
|
"kind": exc.kind,
|
|
"message": exc.message,
|
|
**(exc.details or {}),
|
|
})
|
|
|
|
try:
|
|
client = bq.client()
|
|
except BqAccessError as exc:
|
|
status = 400 if exc.kind == "not_configured" else 502
|
|
raise HTTPException(status_code=status, detail={
|
|
"kind": exc.kind,
|
|
"message": exc.message,
|
|
**(exc.details or {}),
|
|
})
|
|
|
|
started = time.monotonic()
|
|
try:
|
|
job = client.query("SELECT 1 AS ok")
|
|
except BqAccessError as exc:
|
|
status = 400 if exc.kind == "not_configured" else 502
|
|
raise HTTPException(status_code=status, detail={
|
|
"kind": exc.kind,
|
|
"message": exc.message,
|
|
**(exc.details or {}),
|
|
})
|
|
except Exception as exc:
|
|
# Fall through to upstream error — covers unexpected exception
|
|
# types from the BQ client library.
|
|
raise HTTPException(status_code=502, detail={
|
|
"kind": "bq_upstream_error",
|
|
"message": str(exc),
|
|
})
|
|
|
|
try:
|
|
job.result(timeout=_QUERY_TIMEOUT_SECONDS)
|
|
except concurrent.futures.TimeoutError:
|
|
# Best-effort cancel — the BQ job keeps running on BQ side until
|
|
# it sees the cancel or hits BQ's own timeout. Swallow any cancel
|
|
# failure (we already failed; layering a cancel error is noise).
|
|
try:
|
|
client.cancel_job(
|
|
job.job_id,
|
|
location=getattr(job, "location", None),
|
|
)
|
|
except Exception:
|
|
logger.warning("BQ cancel_job failed for job_id=%s", job.job_id)
|
|
raise HTTPException(status_code=504, detail={
|
|
"kind": "timeout",
|
|
"elapsed_ms": int(_QUERY_TIMEOUT_SECONDS * 1000),
|
|
"hint": (
|
|
"BigQuery did not respond in 10s. Check network and SA "
|
|
"permissions. The job was best-effort cancelled."
|
|
),
|
|
})
|
|
except BqAccessError as exc:
|
|
# Rare: BqAccessError surfacing from the polling loop (e.g.
|
|
# auth_failed mid-flight).
|
|
raise HTTPException(status_code=502, detail={
|
|
"kind": exc.kind,
|
|
"message": exc.message,
|
|
**(exc.details or {}),
|
|
})
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail={
|
|
"kind": "bq_upstream_error",
|
|
"message": str(exc),
|
|
})
|
|
|
|
elapsed_ms = int((time.monotonic() - started) * 1000)
|
|
return {
|
|
"ok": True,
|
|
"billing_project": bq.projects.billing,
|
|
"data_project": bq.projects.data,
|
|
"elapsed_ms": elapsed_ms,
|
|
}
|