E2E sub-agent finding: instance configured with `data_source.type='bigquery'`
and no `data_source.keboola.*` block. Admin POSTs `{source_type: 'keboola'}`
to /api/admin/register-table → returns 201, row lands in the registry, but
never syncs because the scheduler has no Keboola URL/token to ATTACH
against. Operator only notices the gap when `da catalog` keeps showing
nothing.
The new `_validate_source_type_configured` helper runs immediately after
the id/view-name collision checks in `register_table`. A source_type is
considered configured when:
- it matches `get_data_source_type()` (the instance's primary), OR
- a non-empty `data_source.<source_type>` block exists in the effective
`instance.yaml` (multi-source instance), OR
- it's in `_SOURCE_TYPES_INDEPENDENT_OF_DATA_SOURCE` (Jira / local — both
get data through paths that don't involve `data_source.*`).
Returns 422 with a message that names the configured primary source and
points at `/admin/server-config` for enabling a secondary one. None /
empty source_type is still tolerated for backward compat with legacy CLI
scripts that don't set the field — the route resolves it later.
5 new tests cover: keboola-on-bq rejected, bq-on-keboola rejected,
matching source_type still works, jira allowed regardless, omitted
source_type passes through.
Existing tests that registered Keboola rows on the unconfigured default
test instance now opt into a `keboola_instance` fixture to satisfy the
new validator (tests/test_admin_bq_register.py + .keboola_materialized
+ .unregister_cleanup; the multi-source PUT test in test_admin_bq_register
adds a `keboola` block to its synthetic config).
Pre-existing test_missing_project_returns_error failure in
TestRebuildFromRegistry is unrelated (config-cache leakage from a
previous test in the same class) — confirmed pre-existing on the prior
commit via `git stash` reproduction.
159 lines
5.1 KiB
Python
159 lines
5.1 KiB
Python
"""Tests for Keboola materialized registration."""
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _keboola_instance(monkeypatch):
|
|
"""Configure the test instance with a Keboola data source so the new
|
|
register-table source_type-availability validator (introduced in this
|
|
PR) accepts `source_type='keboola'` payloads. Pre-validator the test
|
|
suite passed without any data_source config because the route blindly
|
|
persisted whatever source_type the caller sent."""
|
|
fake_cfg = {
|
|
"data_source": {
|
|
"type": "keboola",
|
|
"keboola": {
|
|
"stack_url": "https://connection.keboola.com",
|
|
"project_id": "1234",
|
|
"token_env": "KEBOOLA_STORAGE_TOKEN",
|
|
},
|
|
},
|
|
}
|
|
monkeypatch.setattr(
|
|
"app.instance_config.load_instance_config", lambda: fake_cfg, raising=False,
|
|
)
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
yield
|
|
reset_cache()
|
|
|
|
|
|
def test_register_keboola_materialized_accepts_source_query(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "orders_recent",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT * FROM kbc.\"in.c-sales\".\"orders\" WHERE date > '2026-01-01'",
|
|
"sync_schedule": "daily 03:00",
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
|
|
def test_register_keboola_materialized_rejects_missing_source_query(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "orders_recent",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
# source_query missing
|
|
},
|
|
)
|
|
assert r.status_code == 422
|
|
assert "source_query" in r.text
|
|
|
|
|
|
def test_register_keboola_materialized_skips_bucket_check(seeded_app):
|
|
"""Materialized rows don't need bucket/source_table — the SELECT inlines
|
|
the references. Mirror of BQ materialized validator behavior."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "x",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT 1",
|
|
# No bucket / source_table — must still succeed.
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
|
|
def test_update_keboola_materialized_clears_stale_source_query_on_mode_switch(seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Register materialized.
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "x",
|
|
"source_type": "keboola",
|
|
"query_mode": "materialized",
|
|
"source_query": "SELECT 1",
|
|
},
|
|
)
|
|
assert r.status_code == 201
|
|
|
|
# PUT to switch back to local — source_query must clear.
|
|
r = c.put(
|
|
"/api/admin/registry/x",
|
|
headers=auth,
|
|
json={
|
|
"source_type": "keboola",
|
|
"query_mode": "local",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "y",
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
|
|
r = c.get("/api/admin/registry", headers=auth)
|
|
rows = r.json()["tables"]
|
|
row = next(t for t in rows if t["id"] == "x")
|
|
assert row.get("source_query") in (None, "")
|
|
|
|
|
|
def test_update_keboola_to_materialized_without_source_query_rejected(seeded_app):
|
|
"""Devin finding 2026-05-01 (BUG_pr-review-job-58ae3148_0001):
|
|
PUT cannot persist a non-BQ materialized row without source_query.
|
|
Pre-fix, the validation only fired for source_type='bigquery' via the
|
|
synthetic RegisterTableRequest; Keboola rows could be flipped to
|
|
materialized with source_query=None and crash at the next sync tick."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Register a Keboola local row (source_query intentionally absent).
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "kb_local",
|
|
"source_type": "keboola",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "events",
|
|
"query_mode": "local",
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
|
|
# Try to flip to materialized WITHOUT shipping source_query.
|
|
r = c.put(
|
|
"/api/admin/registry/kb_local",
|
|
headers=auth,
|
|
json={"query_mode": "materialized"},
|
|
)
|
|
assert r.status_code == 422, r.text
|
|
body = r.json()
|
|
detail = body.get("detail", "")
|
|
if isinstance(detail, list):
|
|
detail = " ".join(str(d) for d in detail)
|
|
assert "source_query" in detail.lower(), body
|