diff --git a/app/api/admin.py b/app/api/admin.py index 813120c..19f447b 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2126,6 +2126,28 @@ async def update_table( if merged.get("query_mode") != "materialized": merged["source_query"] = None + # Cross-source coherence: query_mode='materialized' requires a + # non-empty source_query for ALL source types, not just BigQuery. + # Pre-fix, only the BQ-specific synthetic-RegisterTableRequest below + # caught this — Keboola materialized rows could be PUT without + # source_query and persisted with source_query=None, then crash at + # the next sync tick when kb_materialize_query received `sql=None` + # and DuckDB rejected `COPY (None) TO ...`. Devin finding 2026-05-01: + # BUG_pr-review-job-58ae3148_0001. + if merged.get("query_mode") == "materialized": + sq = merged.get("source_query") + if not sq or not str(sq).strip(): + raise HTTPException( + status_code=422, + detail=( + "query_mode='materialized' requires a non-empty " + "source_query. To revert to a non-materialized mode, " + "PATCH query_mode='local' (Keboola) or 'remote' " + "(BigQuery) and the stale source_query is cleared " + "automatically." + ), + ) + if merged.get("source_type") == "bigquery": # Reuse the register-time validator. It mutates the request to # force query_mode='remote' / profile_after_sync=False (or to diff --git a/app/api/health.py b/app/api/health.py index 13a811c..736542b 100644 --- a/app/api/health.py +++ b/app/api/health.py @@ -48,7 +48,17 @@ def _check_bq_billing_project() -> dict | None: billing = bq.projects.billing data = bq.projects.data except Exception as e: - return {"status": "ok", "detail": f"could not resolve BQ projects: {e}"} + # Resolution failure (missing google-cloud-bigquery, auth error, + # malformed config) is itself a problem worth surfacing. Returning + # status='ok' would mask the failure from automated alerting that + # keys on `status != 'ok'`. Use 'unknown' so the entry shows as + # non-green in operator dashboards but doesn't promote the overall + # check to 'degraded' (which 'warning' does). Devin finding + # 2026-05-01: ANALYSIS_pr-review-job-642ff90f_0007. + return { + "status": "unknown", + "detail": f"could not resolve BQ projects: {e}", + } if not data: # not_configured sentinel — surfaced elsewhere; nothing to warn about here. diff --git a/tests/test_admin_keboola_materialized.py b/tests/test_admin_keboola_materialized.py index 1a08776..62fab26 100644 --- a/tests/test_admin_keboola_materialized.py +++ b/tests/test_admin_keboola_materialized.py @@ -93,3 +93,41 @@ def test_update_keboola_materialized_clears_stale_source_query_on_mode_switch(se rows = r.json()["tables"] row = next(t for t in rows if t["id"] == "x") assert row.get("source_query") in (None, "") + + +def test_update_keboola_to_materialized_without_source_query_rejected(seeded_app): + """Devin finding 2026-05-01 (BUG_pr-review-job-58ae3148_0001): + PUT cannot persist a non-BQ materialized row without source_query. + Pre-fix, the validation only fired for source_type='bigquery' via the + synthetic RegisterTableRequest; Keboola rows could be flipped to + materialized with source_query=None and crash at the next sync tick.""" + c = seeded_app["client"] + token = seeded_app["admin_token"] + auth = {"Authorization": f"Bearer {token}"} + + # Register a Keboola local row (source_query intentionally absent). + r = c.post( + "/api/admin/register-table", + headers=auth, + json={ + "name": "kb_local", + "source_type": "keboola", + "bucket": "in.c-foo", + "source_table": "events", + "query_mode": "local", + }, + ) + assert r.status_code == 201, r.text + + # Try to flip to materialized WITHOUT shipping source_query. + r = c.put( + "/api/admin/registry/kb_local", + headers=auth, + json={"query_mode": "materialized"}, + ) + assert r.status_code == 422, r.text + body = r.json() + detail = body.get("detail", "") + if isinstance(detail, list): + detail = " ".join(str(d) for d in detail) + assert "source_query" in detail.lower(), body diff --git a/tests/test_diagnose_billing.py b/tests/test_diagnose_billing.py index 40ea6e1..0ccb1d5 100644 --- a/tests/test_diagnose_billing.py +++ b/tests/test_diagnose_billing.py @@ -109,3 +109,48 @@ def test_diagnose_no_warning_on_keboola_instance(seeded_app, monkeypatch): bq_cfg = body.get("services", {}).get("bq_config") if bq_cfg is not None: assert bq_cfg.get("status") != "warning", bq_cfg + + +def test_diagnose_returns_unknown_status_when_bq_resolution_fails(seeded_app, monkeypatch): + """Devin finding 2026-05-01 (ANALYSIS_pr-review-job-642ff90f_0007): + if get_bq_access() raises (missing google-cloud-bigquery, auth error, + malformed config), the bq_config check must NOT report status='ok' — + automated alerting keyed on `status != 'ok'` would silently miss the + failure. Use 'unknown' so dashboards surface it without promoting the + overall check to 'degraded' (which 'warning' would do).""" + fake_cfg = { + "data_source": { + "type": "bigquery", + "bigquery": {"project": "p"}, + }, + } + monkeypatch.setattr( + "app.instance_config.load_instance_config", + lambda: fake_cfg, raising=False, + ) + from app.instance_config import reset_cache + reset_cache() + + # Force get_bq_access to raise. + def _raise(*a, **kw): + raise RuntimeError("simulated bq lib missing") + + import connectors.bigquery.access as bq_access_mod + monkeypatch.setattr(bq_access_mod, "get_bq_access", _raise) + + try: + c = seeded_app["client"] + token = seeded_app["admin_token"] + r = c.get( + "/api/health/detailed", + headers={"Authorization": f"Bearer {token}"}, + ) + assert r.status_code == 200, r.text + body = r.json() + bq_check = body.get("services", {}).get("bq_config") + assert bq_check is not None, body + # Must NOT be 'ok' — that would mask the failure from alerting. + assert bq_check.get("status") == "unknown", bq_check + assert "could not resolve" in bq_check.get("detail", "").lower() + finally: + reset_cache()