feat(admin): #160 BQ test-connection endpoint + billing_project placeholder UI
Closes the operator-side half of the reporter's loop. The CLI fix in
the previous commit makes USER_PROJECT_DENIED errors readable to
analysts; this commit lets admins verify reachability proactively
from /admin/server-config without waiting for analyst reports.
New endpoint POST /api/admin/bigquery/test-connection
(app/api/admin_bigquery_test.py, ~110 LOC):
- Depends(require_admin); registered in app/main.py.
- Builds BqAccess via existing get_bq_access(), runs `SELECT 1 AS ok`
with a 10s polling timeout.
- 200 with {ok, billing_project, data_project, elapsed_ms} on success.
- 400 for `BqAccessError(not_configured)` (operator config issue).
- 502 for any other typed BqAccessError or unknown upstream exception.
- 504 for concurrent.futures.TimeoutError; best-effort cancel_job
invoked (BQ-side cancel may still run; documented caveat).
Server-config placeholder (app/api/admin.py + admin_server_config.html):
- `data_source.bigquery.billing_project` field-spec gains
`placeholder_from: ["data_source", "bigquery", "project"]`.
- renderLeafInput's text branch reads `opts.spec.placeholder_from`,
walks the loaded `original` config dict, injects
`placeholder="(defaults to <project>)"` into the input HTML at
construction time. Admin sees the access.py:339-340 fallback rule
visible directly in the UI without reading source.
UI button:
- "Test BigQuery connection" button next to data_source's Save button.
- onTestBigQuery() POSTs to the endpoint, renders structured result
inline (green check + elapsed_ms on success; red kind + hint on
failure).
Tests: 6 endpoint cases + 1 placeholder payload test = 7 GREEN. 62
total across the affected admin server-config test files.
This commit is contained in:
parent
f0494ef356
commit
39bdc1ff45
4 changed files with 191 additions and 1 deletions
|
|
@ -225,6 +225,13 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = {
|
|||
"data project). Defaults to data_source.bigquery.project. "
|
||||
"Mismatch → 403 USER_PROJECT_DENIED on every BQ call."
|
||||
),
|
||||
# Issue #160 §4.7.5: when this field is empty in the
|
||||
# admin form, the JS template shows "(defaults to <project>)"
|
||||
# as placeholder text — surfacing the access.py:339-340
|
||||
# fallback rule directly in the UI without the operator
|
||||
# having to read source. Path is walked against the
|
||||
# `original` config payload from GET /api/admin/server-config.
|
||||
"placeholder_from": ["data_source", "bigquery", "project"],
|
||||
},
|
||||
"max_bytes_per_materialize": {
|
||||
"kind": "int",
|
||||
|
|
|
|||
124
app/api/admin_bigquery_test.py
Normal file
124
app/api/admin_bigquery_test.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
"""POST /api/admin/bigquery/test-connection — admin-only health probe.
|
||||
|
||||
Closes the operator-side half of #160. The reporter saw
|
||||
USER_PROJECT_DENIED raw in the analyst CLI (now fixed via the structured
|
||||
renderer in cli/error_render.py); this endpoint lets an admin verify the
|
||||
saved BQ config from /admin/server-config WITHOUT having to wait for an
|
||||
analyst to hit a query failure first.
|
||||
|
||||
Implementation runs a minimal `SELECT 1` via the existing BqAccess
|
||||
plumbing with a 10s polling timeout. On `concurrent.futures.TimeoutError`
|
||||
the BQ job is best-effort cancelled (job continues running on BQ side
|
||||
until BQ-side timeout if the cancel itself fails — documented caveat).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import time
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from app.auth.access import require_admin
|
||||
from connectors.bigquery.access import get_bq_access, BqAccessError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/admin/bigquery", tags=["admin"])
|
||||
|
||||
_QUERY_TIMEOUT_SECONDS = 10.0
|
||||
|
||||
|
||||
@router.post("/test-connection")
|
||||
async def test_connection(_user: dict = Depends(require_admin)):
|
||||
"""Run `SELECT 1 AS ok` against BigQuery via the configured BqAccess.
|
||||
|
||||
Returns 200 with `{ok, billing_project, data_project, elapsed_ms}` on
|
||||
success. Maps known failure modes:
|
||||
|
||||
- `BqAccessError(not_configured)` → 400 with the typed detail
|
||||
- `BqAccessError` (other kinds) → 502 with the typed detail
|
||||
- `concurrent.futures.TimeoutError` → 504 with `kind="timeout"` and
|
||||
best-effort `cancel_job` invoked
|
||||
"""
|
||||
try:
|
||||
bq = get_bq_access()
|
||||
except BqAccessError as exc:
|
||||
# not_configured is a 400 (operator config issue, not server fault).
|
||||
status = 400 if exc.kind == "not_configured" else 502
|
||||
raise HTTPException(status_code=status, detail={
|
||||
"kind": exc.kind,
|
||||
"message": exc.message,
|
||||
**(exc.details or {}),
|
||||
})
|
||||
|
||||
try:
|
||||
client = bq.client()
|
||||
except BqAccessError as exc:
|
||||
status = 400 if exc.kind == "not_configured" else 502
|
||||
raise HTTPException(status_code=status, detail={
|
||||
"kind": exc.kind,
|
||||
"message": exc.message,
|
||||
**(exc.details or {}),
|
||||
})
|
||||
|
||||
started = time.monotonic()
|
||||
try:
|
||||
job = client.query("SELECT 1 AS ok")
|
||||
except BqAccessError as exc:
|
||||
status = 400 if exc.kind == "not_configured" else 502
|
||||
raise HTTPException(status_code=status, detail={
|
||||
"kind": exc.kind,
|
||||
"message": exc.message,
|
||||
**(exc.details or {}),
|
||||
})
|
||||
except Exception as exc:
|
||||
# Fall through to upstream error — covers unexpected exception
|
||||
# types from the BQ client library.
|
||||
raise HTTPException(status_code=502, detail={
|
||||
"kind": "bq_upstream_error",
|
||||
"message": str(exc),
|
||||
})
|
||||
|
||||
try:
|
||||
job.result(timeout=_QUERY_TIMEOUT_SECONDS)
|
||||
except concurrent.futures.TimeoutError:
|
||||
# Best-effort cancel — the BQ job keeps running on BQ side until
|
||||
# it sees the cancel or hits BQ's own timeout. Swallow any cancel
|
||||
# failure (we already failed; layering a cancel error is noise).
|
||||
try:
|
||||
client.cancel_job(
|
||||
job.job_id,
|
||||
location=getattr(job, "location", None),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("BQ cancel_job failed for job_id=%s", job.job_id)
|
||||
raise HTTPException(status_code=504, detail={
|
||||
"kind": "timeout",
|
||||
"elapsed_ms": int(_QUERY_TIMEOUT_SECONDS * 1000),
|
||||
"hint": (
|
||||
"BigQuery did not respond in 10s. Check network and SA "
|
||||
"permissions. The job was best-effort cancelled."
|
||||
),
|
||||
})
|
||||
except BqAccessError as exc:
|
||||
# Rare: BqAccessError surfacing from the polling loop (e.g.
|
||||
# auth_failed mid-flight).
|
||||
raise HTTPException(status_code=502, detail={
|
||||
"kind": exc.kind,
|
||||
"message": exc.message,
|
||||
**(exc.details or {}),
|
||||
})
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail={
|
||||
"kind": "bq_upstream_error",
|
||||
"message": str(exc),
|
||||
})
|
||||
|
||||
elapsed_ms = int((time.monotonic() - started) * 1000)
|
||||
return {
|
||||
"ok": True,
|
||||
"billing_project": bq.projects.billing,
|
||||
"data_project": bq.projects.data,
|
||||
"elapsed_ms": elapsed_ms,
|
||||
}
|
||||
|
|
@ -110,6 +110,7 @@ from app.api.telegram import router as telegram_router
|
|||
from app.api.access import router as access_router, me_router as me_access_router
|
||||
from app.api.me_debug import router as me_debug_router
|
||||
from app.api.admin import router as admin_router
|
||||
from app.api.admin_bigquery_test import router as admin_bigquery_test_router
|
||||
from app.api.jira_webhooks import router as jira_webhooks_router
|
||||
from app.api.metrics import router as metrics_router
|
||||
from app.api.metadata import router as metadata_router
|
||||
|
|
@ -514,6 +515,7 @@ def create_app() -> FastAPI:
|
|||
app.include_router(catalog_router)
|
||||
app.include_router(telegram_router)
|
||||
app.include_router(admin_router)
|
||||
app.include_router(admin_bigquery_test_router)
|
||||
app.include_router(access_router)
|
||||
app.include_router(me_access_router)
|
||||
app.include_router(me_debug_router)
|
||||
|
|
|
|||
|
|
@ -316,7 +316,21 @@ function renderLeafInput(fieldId, section, pathSegments, kind, value, opts, isUn
|
|||
const v = isUnset
|
||||
? (opts && opts.spec && opts.spec.default != null ? String(opts.spec.default) : "")
|
||||
: (value == null ? "" : value);
|
||||
return `<input id="${fieldId}" type="text" data-section="${section}" data-key="${escHtml(dottedKey)}" data-path="${dataPath}" value="${escHtml(v)}">`;
|
||||
// Issue #160 §4.7.5: `placeholder_from: ["a","b","c"]` walks the loaded
|
||||
// `original` config dict and shows "(defaults to <resolved>)" greyed in
|
||||
// the empty input. Used by data_source.bigquery.billing_project to
|
||||
// surface the access.py:339-340 billing→data fallback in the UI.
|
||||
let placeholderAttr = "";
|
||||
if (isUnset && opts && opts.spec && Array.isArray(opts.spec.placeholder_from)) {
|
||||
const resolved = opts.spec.placeholder_from.reduce(
|
||||
(cur, k) => (cur && typeof cur === "object" ? cur[k] : undefined),
|
||||
original,
|
||||
);
|
||||
if (resolved !== undefined && resolved !== null && resolved !== "") {
|
||||
placeholderAttr = ` placeholder="(defaults to ${escHtml(String(resolved))})"`;
|
||||
}
|
||||
}
|
||||
return `<input id="${fieldId}" type="text" data-section="${section}" data-key="${escHtml(dottedKey)}" data-path="${dataPath}" value="${escHtml(v)}"${placeholderAttr}>`;
|
||||
}
|
||||
|
||||
// Cast a string raw value to the JS type implied by an item_kind / value_kind.
|
||||
|
|
@ -680,6 +694,10 @@ function renderSection(section, payload, knownForSection) {
|
|||
</div>
|
||||
<div class="section-actions">
|
||||
<button class="cfg-btn primary" data-action="save-section" data-section="${section}">Save ${escHtml(meta.title.toLowerCase())}</button>
|
||||
${section === "data_source" ? `
|
||||
<button class="cfg-btn" data-action="test-bigquery" type="button">Test BigQuery connection</button>
|
||||
<span class="bq-test-result" data-section="${section}" hidden style="margin-left: 1ex;"></span>
|
||||
` : ""}
|
||||
</div>
|
||||
</section>`;
|
||||
}
|
||||
|
|
@ -695,6 +713,12 @@ function renderAll(data) {
|
|||
wrap.querySelectorAll('[data-action="save-section"]').forEach(btn =>
|
||||
btn.addEventListener("click", () => onSaveSection(btn.dataset.section)));
|
||||
|
||||
// Issue #160 §4.9: Test BigQuery connection — admin probe to verify the
|
||||
// saved data_source.bigquery config is reachable WITHOUT having to
|
||||
// ssh to the VM or wait for an analyst's failed query.
|
||||
wrap.querySelectorAll('[data-action="test-bigquery"]').forEach(btn =>
|
||||
btn.addEventListener("click", () => onTestBigQuery(btn)));
|
||||
|
||||
// Wire array-of-scalars + map-of-scalars add/remove buttons via event
|
||||
// delegation on the wrapper. Re-attaching after every renderAll() is
|
||||
// fine because we replace innerHTML wholesale on each load.
|
||||
|
|
@ -974,6 +998,39 @@ function collectSection(section) {
|
|||
return patch;
|
||||
}
|
||||
|
||||
// ── BigQuery test connection (#160 §4.9) ───────────────────────────────
|
||||
async function onTestBigQuery(btn) {
|
||||
const resultEl = btn.parentElement.querySelector(".bq-test-result");
|
||||
resultEl.hidden = false;
|
||||
resultEl.textContent = "Testing…";
|
||||
resultEl.style.color = "";
|
||||
btn.disabled = true;
|
||||
try {
|
||||
const r = await fetch("/api/admin/bigquery/test-connection", {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (r.ok) {
|
||||
const body = await r.json();
|
||||
resultEl.textContent = `✓ ok (${body.elapsed_ms} ms; billing=${body.billing_project}, data=${body.data_project})`;
|
||||
resultEl.style.color = "#2a8c4a";
|
||||
} else {
|
||||
let body;
|
||||
try { body = await r.json(); } catch (_) { body = await r.text(); }
|
||||
const detail = body && typeof body === "object" ? body.detail : body;
|
||||
const kind = detail && typeof detail === "object" ? (detail.kind || "error") : "error";
|
||||
const hint = detail && typeof detail === "object" ? (detail.hint || detail.message || "") : String(detail);
|
||||
resultEl.textContent = `✗ ${kind}${hint ? " — " + hint : ""}`;
|
||||
resultEl.style.color = "#c0392b";
|
||||
}
|
||||
} catch (e) {
|
||||
resultEl.textContent = `✗ network error — ${e.message}`;
|
||||
resultEl.style.color = "#c0392b";
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Save flow ─────────────────────────────────────────────────────────
|
||||
async function onSaveSection(section) {
|
||||
hideBanner();
|
||||
|
|
|
|||
Loading…
Reference in a new issue