Devin's second review pass on commit 16938ae7 surfaced 2 more issues:
BUG_pr-review-job-58ae3148_0001 — non-BQ materialized via PUT bypasses source_query check
app/api/admin.py update_table only enforces 'query_mode=materialized
requires source_query' for source_type='bigquery' rows (via the
synthetic RegisterTableRequest at line 2129+). Non-BQ source types
(Keboola) skip the check — admin could PUT {query_mode: materialized}
on a Keboola local row without source_query, persist successfully,
then crash at the next sync tick when kb_materialize_query received
sql=None and DuckDB rejected COPY (None) TO '...'.
Fix: generic coherence guard before the BQ-specific block — for ALL
source types, query_mode='materialized' requires non-empty source_query
in the merged record. Returns 422 with a hint about reverting via
query_mode='local'/'remote'.
ANALYSIS_pr-review-job-642ff90f_0007 — diagnose returns 'ok' on BQ resolution failure
app/api/health.py:_check_bq_billing_project caught get_bq_access()
exceptions and returned status='ok' with a 'could not resolve' detail.
Automated alerting keyed on status != 'ok' would silently miss missing
google-cloud-bigquery, auth failures, or malformed config. Fix: return
status='unknown' on resolution failure — surfaces it on operator
dashboards without promoting the overall health to 'degraded' (which
'warning' does, intentionally for the billing==project case).
Tests:
- test_update_keboola_to_materialized_without_source_query_rejected:
PUT {query_mode: materialized} on a Keboola local row returns 422
with 'source_query' in the detail
- test_diagnose_returns_unknown_status_when_bq_resolution_fails:
when get_bq_access raises, the bq_config service entry surfaces
status='unknown' (not 'ok')
Full sweep: 2507 passed, 25 skipped, 0 failed (+2 from previous sweep
because of the 2 new regression tests; 8 pre-existing internal_roles
schema-migration failures still ignored per task brief).
133 lines
4.2 KiB
Python
133 lines
4.2 KiB
Python
"""Tests for Keboola materialized registration."""
|
|
import pytest
|
|
|
|
|
|
def test_register_keboola_materialized_accepts_source_query(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "orders_recent",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT * FROM kbc.\"in.c-sales\".\"orders\" WHERE date > '2026-01-01'",
|
|
"sync_schedule": "daily 03:00",
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
|
|
def test_register_keboola_materialized_rejects_missing_source_query(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "orders_recent",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
# source_query missing
|
|
},
|
|
)
|
|
assert r.status_code == 422
|
|
assert "source_query" in r.text
|
|
|
|
|
|
def test_register_keboola_materialized_skips_bucket_check(seeded_app):
|
|
"""Materialized rows don't need bucket/source_table — the SELECT inlines
|
|
the references. Mirror of BQ materialized validator behavior."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "x",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT 1",
|
|
# No bucket / source_table — must still succeed.
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
|
|
def test_update_keboola_materialized_clears_stale_source_query_on_mode_switch(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Register materialized.
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "x",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT 1",
|
|
},
|
|
)
|
|
assert r.status_code == 201
|
|
|
|
# PUT to switch back to local — source_query must clear.
|
|
r = c.put(
|
|
"/api/admin/registry/x",
|
|
headers=auth,
|
|
json={
|
|
"source_type": "keboola",
|
|
"query_mode": "local",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "y",
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
|
|
r = c.get("/api/admin/registry", headers=auth)
|
|
rows = r.json()["tables"]
|
|
row = next(t for t in rows if t["id"] == "x")
|
|
assert row.get("source_query") in (None, "")
|
|
|
|
|
|
def test_update_keboola_to_materialized_without_source_query_rejected(seeded_app):
|
|
"""Devin finding 2026-05-01 (BUG_pr-review-job-58ae3148_0001):
|
|
PUT cannot persist a non-BQ materialized row without source_query.
|
|
Pre-fix, the validation only fired for source_type='bigquery' via the
|
|
synthetic RegisterTableRequest; Keboola rows could be flipped to
|
|
materialized with source_query=None and crash at the next sync tick."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Register a Keboola local row (source_query intentionally absent).
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "kb_local",
|
|
"source_type": "keboola",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "events",
|
|
"query_mode": "local",
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
# Try to flip to materialized WITHOUT shipping source_query.
|
|
r = c.put(
|
|
"/api/admin/registry/kb_local",
|
|
headers=auth,
|
|
json={"query_mode": "materialized"},
|
|
)
|
|
assert r.status_code == 422, r.text
|
|
body = r.json()
|
|
detail = body.get("detail", "")
|
|
if isinstance(detail, list):
|
|
detail = " ".join(str(d) for d in detail)
|
|
assert "source_query" in detail.lower(), body
|