fix(admin+diagnose): address 2 additional Devin Review findings on PR #152
Devin's second review pass on commit 16938ae7 surfaced 2 more issues:
BUG_pr-review-job-58ae3148_0001 — non-BQ materialized via PUT bypasses source_query check
app/api/admin.py update_table only enforces 'query_mode=materialized
requires source_query' for source_type='bigquery' rows (via the
synthetic RegisterTableRequest at line 2129+). Non-BQ source types
(Keboola) skip the check — admin could PUT {query_mode: materialized}
on a Keboola local row without source_query, persist successfully,
then crash at the next sync tick when kb_materialize_query received
sql=None and DuckDB rejected COPY (None) TO '...'.
Fix: generic coherence guard before the BQ-specific block — for ALL
source types, query_mode='materialized' requires non-empty source_query
in the merged record. Returns 422 with a hint about reverting via
query_mode='local'/'remote'.
ANALYSIS_pr-review-job-642ff90f_0007 — diagnose returns 'ok' on BQ resolution failure
app/api/health.py:_check_bq_billing_project caught get_bq_access()
exceptions and returned status='ok' with a 'could not resolve' detail.
Automated alerting keyed on status != 'ok' would silently miss missing
google-cloud-bigquery, auth failures, or malformed config. Fix: return
status='unknown' on resolution failure — surfaces it on operator
dashboards without promoting the overall health to 'degraded' (which
'warning' does, intentionally for the billing==project case).
Tests:
- test_update_keboola_to_materialized_without_source_query_rejected:
PUT {query_mode: materialized} on a Keboola local row returns 422
with 'source_query' in the detail
- test_diagnose_returns_unknown_status_when_bq_resolution_fails:
when get_bq_access raises, the bq_config service entry surfaces
status='unknown' (not 'ok')
Full sweep: 2507 passed, 25 skipped, 0 failed (+2 from previous sweep
because of the 2 new regression tests; 8 pre-existing internal_roles
schema-migration failures still ignored per task brief).
This commit is contained in:
parent
16938ae7cb
commit
a4339ce679
4 changed files with 116 additions and 1 deletions
|
|
@ -2126,6 +2126,28 @@ async def update_table(
|
||||||
if merged.get("query_mode") != "materialized":
|
if merged.get("query_mode") != "materialized":
|
||||||
merged["source_query"] = None
|
merged["source_query"] = None
|
||||||
|
|
||||||
|
# Cross-source coherence: query_mode='materialized' requires a
|
||||||
|
# non-empty source_query for ALL source types, not just BigQuery.
|
||||||
|
# Pre-fix, only the BQ-specific synthetic-RegisterTableRequest below
|
||||||
|
# caught this — Keboola materialized rows could be PUT without
|
||||||
|
# source_query and persisted with source_query=None, then crash at
|
||||||
|
# the next sync tick when kb_materialize_query received `sql=None`
|
||||||
|
# and DuckDB rejected `COPY (None) TO ...`. Devin finding 2026-05-01:
|
||||||
|
# BUG_pr-review-job-58ae3148_0001.
|
||||||
|
if merged.get("query_mode") == "materialized":
|
||||||
|
sq = merged.get("source_query")
|
||||||
|
if not sq or not str(sq).strip():
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422,
|
||||||
|
detail=(
|
||||||
|
"query_mode='materialized' requires a non-empty "
|
||||||
|
"source_query. To revert to a non-materialized mode, "
|
||||||
|
"PATCH query_mode='local' (Keboola) or 'remote' "
|
||||||
|
"(BigQuery) and the stale source_query is cleared "
|
||||||
|
"automatically."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
if merged.get("source_type") == "bigquery":
|
if merged.get("source_type") == "bigquery":
|
||||||
# Reuse the register-time validator. It mutates the request to
|
# Reuse the register-time validator. It mutates the request to
|
||||||
# force query_mode='remote' / profile_after_sync=False (or to
|
# force query_mode='remote' / profile_after_sync=False (or to
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,17 @@ def _check_bq_billing_project() -> dict | None:
|
||||||
billing = bq.projects.billing
|
billing = bq.projects.billing
|
||||||
data = bq.projects.data
|
data = bq.projects.data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "ok", "detail": f"could not resolve BQ projects: {e}"}
|
# Resolution failure (missing google-cloud-bigquery, auth error,
|
||||||
|
# malformed config) is itself a problem worth surfacing. Returning
|
||||||
|
# status='ok' would mask the failure from automated alerting that
|
||||||
|
# keys on `status != 'ok'`. Use 'unknown' so the entry shows as
|
||||||
|
# non-green in operator dashboards but doesn't promote the overall
|
||||||
|
# check to 'degraded' (which 'warning' does). Devin finding
|
||||||
|
# 2026-05-01: ANALYSIS_pr-review-job-642ff90f_0007.
|
||||||
|
return {
|
||||||
|
"status": "unknown",
|
||||||
|
"detail": f"could not resolve BQ projects: {e}",
|
||||||
|
}
|
||||||
|
|
||||||
if not data:
|
if not data:
|
||||||
# not_configured sentinel — surfaced elsewhere; nothing to warn about here.
|
# not_configured sentinel — surfaced elsewhere; nothing to warn about here.
|
||||||
|
|
|
||||||
|
|
@ -93,3 +93,41 @@ def test_update_keboola_materialized_clears_stale_source_query_on_mode_switch(se
|
||||||
rows = r.json()["tables"]
|
rows = r.json()["tables"]
|
||||||
row = next(t for t in rows if t["id"] == "x")
|
row = next(t for t in rows if t["id"] == "x")
|
||||||
assert row.get("source_query") in (None, "")
|
assert row.get("source_query") in (None, "")
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_keboola_to_materialized_without_source_query_rejected(seeded_app):
|
||||||
|
"""Devin finding 2026-05-01 (BUG_pr-review-job-58ae3148_0001):
|
||||||
|
PUT cannot persist a non-BQ materialized row without source_query.
|
||||||
|
Pre-fix, the validation only fired for source_type='bigquery' via the
|
||||||
|
synthetic RegisterTableRequest; Keboola rows could be flipped to
|
||||||
|
materialized with source_query=None and crash at the next sync tick."""
|
||||||
|
c = seeded_app["client"]
|
||||||
|
token = seeded_app["admin_token"]
|
||||||
|
auth = {"Authorization": f"Bearer {token}"}
|
||||||
|
|
||||||
|
# Register a Keboola local row (source_query intentionally absent).
|
||||||
|
r = c.post(
|
||||||
|
"/api/admin/register-table",
|
||||||
|
headers=auth,
|
||||||
|
json={
|
||||||
|
"name": "kb_local",
|
||||||
|
"source_type": "keboola",
|
||||||
|
"bucket": "in.c-foo",
|
||||||
|
"source_table": "events",
|
||||||
|
"query_mode": "local",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert r.status_code == 201, r.text
|
||||||
|
|
||||||
|
# Try to flip to materialized WITHOUT shipping source_query.
|
||||||
|
r = c.put(
|
||||||
|
"/api/admin/registry/kb_local",
|
||||||
|
headers=auth,
|
||||||
|
json={"query_mode": "materialized"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 422, r.text
|
||||||
|
body = r.json()
|
||||||
|
detail = body.get("detail", "")
|
||||||
|
if isinstance(detail, list):
|
||||||
|
detail = " ".join(str(d) for d in detail)
|
||||||
|
assert "source_query" in detail.lower(), body
|
||||||
|
|
|
||||||
|
|
@ -109,3 +109,48 @@ def test_diagnose_no_warning_on_keboola_instance(seeded_app, monkeypatch):
|
||||||
bq_cfg = body.get("services", {}).get("bq_config")
|
bq_cfg = body.get("services", {}).get("bq_config")
|
||||||
if bq_cfg is not None:
|
if bq_cfg is not None:
|
||||||
assert bq_cfg.get("status") != "warning", bq_cfg
|
assert bq_cfg.get("status") != "warning", bq_cfg
|
||||||
|
|
||||||
|
|
||||||
|
def test_diagnose_returns_unknown_status_when_bq_resolution_fails(seeded_app, monkeypatch):
|
||||||
|
"""Devin finding 2026-05-01 (ANALYSIS_pr-review-job-642ff90f_0007):
|
||||||
|
if get_bq_access() raises (missing google-cloud-bigquery, auth error,
|
||||||
|
malformed config), the bq_config check must NOT report status='ok' —
|
||||||
|
automated alerting keyed on `status != 'ok'` would silently miss the
|
||||||
|
failure. Use 'unknown' so dashboards surface it without promoting the
|
||||||
|
overall check to 'degraded' (which 'warning' would do)."""
|
||||||
|
fake_cfg = {
|
||||||
|
"data_source": {
|
||||||
|
"type": "bigquery",
|
||||||
|
"bigquery": {"project": "p"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.instance_config.load_instance_config",
|
||||||
|
lambda: fake_cfg, raising=False,
|
||||||
|
)
|
||||||
|
from app.instance_config import reset_cache
|
||||||
|
reset_cache()
|
||||||
|
|
||||||
|
# Force get_bq_access to raise.
|
||||||
|
def _raise(*a, **kw):
|
||||||
|
raise RuntimeError("simulated bq lib missing")
|
||||||
|
|
||||||
|
import connectors.bigquery.access as bq_access_mod
|
||||||
|
monkeypatch.setattr(bq_access_mod, "get_bq_access", _raise)
|
||||||
|
|
||||||
|
try:
|
||||||
|
c = seeded_app["client"]
|
||||||
|
token = seeded_app["admin_token"]
|
||||||
|
r = c.get(
|
||||||
|
"/api/health/detailed",
|
||||||
|
headers={"Authorization": f"Bearer {token}"},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200, r.text
|
||||||
|
body = r.json()
|
||||||
|
bq_check = body.get("services", {}).get("bq_config")
|
||||||
|
assert bq_check is not None, body
|
||||||
|
# Must NOT be 'ok' — that would mask the failure from alerting.
|
||||||
|
assert bq_check.get("status") == "unknown", bq_check
|
||||||
|
assert "could not resolve" in bq_check.get("detail", "").lower()
|
||||||
|
finally:
|
||||||
|
reset_cache()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue