agnes-the-ai-analyst/app/api/admin_bigquery_test.py
ZdenekSrotyr 39bdc1ff45 feat(admin): #160 BQ test-connection endpoint + billing_project placeholder UI
Closes the operator-side half of the reporter's loop. The CLI fix in
the previous commit makes USER_PROJECT_DENIED errors readable to
analysts; this commit lets admins verify reachability proactively
from /admin/server-config without waiting for analyst reports.

New endpoint POST /api/admin/bigquery/test-connection
(app/api/admin_bigquery_test.py, ~110 LOC):
- Depends(require_admin); registered in app/main.py.
- Builds BqAccess via existing get_bq_access(), runs `SELECT 1 AS ok`
  with a 10s polling timeout.
- 200 with {ok, billing_project, data_project, elapsed_ms} on success.
- 400 for `BqAccessError(not_configured)` (operator config issue).
- 502 for any other typed BqAccessError or unknown upstream exception.
- 504 for concurrent.futures.TimeoutError; best-effort cancel_job
  invoked (BQ-side cancel may still run; documented caveat).

Server-config placeholder (app/api/admin.py + admin_server_config.html):
- `data_source.bigquery.billing_project` field-spec gains
  `placeholder_from: ["data_source", "bigquery", "project"]`.
- renderLeafInput's text branch reads `opts.spec.placeholder_from`,
  walks the loaded `original` config dict, injects
  `placeholder="(defaults to <project>)"` into the input HTML at
  construction time. Admin sees the access.py:339-340 fallback rule
  visible directly in the UI without reading source.

UI button:
- "Test BigQuery connection" button next to data_source's Save button.
- onTestBigQuery() POSTs to the endpoint, renders structured result
  inline (green check + elapsed_ms on success; red kind + hint on
  failure).

Tests: 6 endpoint cases + 1 placeholder payload test = 7 GREEN. 62
total across the affected admin server-config test files.
2026-05-04 10:31:35 +02:00

124 lines
4.4 KiB
Python

"""POST /api/admin/bigquery/test-connection — admin-only health probe.
Closes the operator-side half of #160. The reporter saw
USER_PROJECT_DENIED raw in the analyst CLI (now fixed via the structured
renderer in cli/error_render.py); this endpoint lets an admin verify the
saved BQ config from /admin/server-config WITHOUT having to wait for an
analyst to hit a query failure first.
Implementation runs a minimal `SELECT 1` via the existing BqAccess
plumbing with a 10s polling timeout. On `concurrent.futures.TimeoutError`
the BQ job is best-effort cancelled (job continues running on BQ side
until BQ-side timeout if the cancel itself fails — documented caveat).
"""
from __future__ import annotations
import concurrent.futures
import logging
import time
from fastapi import APIRouter, Depends, HTTPException
from app.auth.access import require_admin
from connectors.bigquery.access import get_bq_access, BqAccessError
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/admin/bigquery", tags=["admin"])
_QUERY_TIMEOUT_SECONDS = 10.0
@router.post("/test-connection")
async def test_connection(_user: dict = Depends(require_admin)):
"""Run `SELECT 1 AS ok` against BigQuery via the configured BqAccess.
Returns 200 with `{ok, billing_project, data_project, elapsed_ms}` on
success. Maps known failure modes:
- `BqAccessError(not_configured)` → 400 with the typed detail
- `BqAccessError` (other kinds) → 502 with the typed detail
- `concurrent.futures.TimeoutError` → 504 with `kind="timeout"` and
best-effort `cancel_job` invoked
"""
try:
bq = get_bq_access()
except BqAccessError as exc:
# not_configured is a 400 (operator config issue, not server fault).
status = 400 if exc.kind == "not_configured" else 502
raise HTTPException(status_code=status, detail={
"kind": exc.kind,
"message": exc.message,
**(exc.details or {}),
})
try:
client = bq.client()
except BqAccessError as exc:
status = 400 if exc.kind == "not_configured" else 502
raise HTTPException(status_code=status, detail={
"kind": exc.kind,
"message": exc.message,
**(exc.details or {}),
})
started = time.monotonic()
try:
job = client.query("SELECT 1 AS ok")
except BqAccessError as exc:
status = 400 if exc.kind == "not_configured" else 502
raise HTTPException(status_code=status, detail={
"kind": exc.kind,
"message": exc.message,
**(exc.details or {}),
})
except Exception as exc:
# Fall through to upstream error — covers unexpected exception
# types from the BQ client library.
raise HTTPException(status_code=502, detail={
"kind": "bq_upstream_error",
"message": str(exc),
})
try:
job.result(timeout=_QUERY_TIMEOUT_SECONDS)
except concurrent.futures.TimeoutError:
# Best-effort cancel — the BQ job keeps running on BQ side until
# it sees the cancel or hits BQ's own timeout. Swallow any cancel
# failure (we already failed; layering a cancel error is noise).
try:
client.cancel_job(
job.job_id,
location=getattr(job, "location", None),
)
except Exception:
logger.warning("BQ cancel_job failed for job_id=%s", job.job_id)
raise HTTPException(status_code=504, detail={
"kind": "timeout",
"elapsed_ms": int(_QUERY_TIMEOUT_SECONDS * 1000),
"hint": (
"BigQuery did not respond in 10s. Check network and SA "
"permissions. The job was best-effort cancelled."
),
})
except BqAccessError as exc:
# Rare: BqAccessError surfacing from the polling loop (e.g.
# auth_failed mid-flight).
raise HTTPException(status_code=502, detail={
"kind": exc.kind,
"message": exc.message,
**(exc.details or {}),
})
except Exception as exc:
raise HTTPException(status_code=502, detail={
"kind": "bq_upstream_error",
"message": str(exc),
})
elapsed_ms = int((time.monotonic() - started) * 1000)
return {
"ok": True,
"billing_project": bq.projects.billing,
"data_project": bq.projects.data,
"elapsed_ms": elapsed_ms,
}