Closes the 'admin pre-stages a curated table/view for analysts' use case end-to-end across both supported source connectors. Backend (BigQuery + Keboola, schema v20): - schema v20 adds source_query TEXT to table_registry (renumbered from v19 after main's #150 RBAC migration also bumped to v19) - connectors/bigquery/extractor.py adds materialize_query(table_id, sql, *, bq, output_dir, max_bytes=...) — BqAccess session, dry-run cost guardrail (default 10 GiB, configurable via data_source.bigquery.max_bytes_per_materialize), idempotent ATTACH, rows/bytes/md5 metadata for sync_state - connectors/keboola/access.py — new KeboolaAccess facade (parallel of BqAccess) wrapping ATTACH 'keboola://...' AS kbc - connectors/keboola/extractor.py adds materialize_query — same shape, no dry-run analog (Keboola Storage API has different cost model); legacy bucket-download path skips query_mode='materialized' rows - app/api/sync.py:_run_materialized_pass dispatches by source_type to the right materialize_query - app/api/admin.py: RegisterTableRequest accepts source_query; model_validator coheres mode↔source_query↔bucket; PUT preserves omitted fields; deprecation marks (Field(deprecated=True)) on sync_strategy + profile_after_sync (no extractor reads them; profile_after_sync becomes inert — bug from earlier work where /api/sync/trigger never honored the flag); _BQ_OPTIONAL_FIELD_DEFAULTS injects defaults into GET /server-config payload Operator + CLI surface: - da admin register-table --query / --query-mode materialized - scripts/smoke-test-materialized-bq.sh — end-to-end smoke for operators Tests (incl. spike + integration + regression): - test_db_migration_v20, test_table_registry_source_query - test_bq_materialize, test_bq_cost_guardrail, test_bq_init_extract_skips - test_keboola_access, test_keboola_extension_query_passthrough (lock-in for the DuckDB extension capability), test_keboola_materialize, test_keboola_init_extract_skips, test_keboola_materialized_e2e (skipped without KBC_TEST_* creds) - test_sync_trigger_materialized, test_sync_trigger_keboola_materialized - test_api_admin_materialized, test_cli_admin_materialized - test_admin_bq_register, test_admin_discover_bigquery, test_admin_keboola_materialized, test_admin_phase_c_deprecation, test_admin_put_preservation, test_materialized_e2e Cost: BQ uses bigquery_query() (jobs API, view-aware) — works on tables, views, materialized views uniformly. Keboola uses ATTACH+COPY parquet through the DuckDB extension.
70 lines
2.6 KiB
Python
70 lines
2.6 KiB
Python
"""Verify Phase C deprecation marks + profile_after_sync becomes inert."""
|
|
import pytest
|
|
from app.api.admin import RegisterTableRequest, UpdateTableRequest
|
|
|
|
|
|
def test_register_request_marks_sync_strategy_deprecated():
|
|
schema = RegisterTableRequest.model_json_schema()
|
|
field = schema["properties"]["sync_strategy"]
|
|
assert field.get("deprecated") is True
|
|
|
|
|
|
def test_register_request_marks_profile_after_sync_deprecated():
|
|
schema = RegisterTableRequest.model_json_schema()
|
|
field = schema["properties"]["profile_after_sync"]
|
|
assert field.get("deprecated") is True
|
|
|
|
|
|
def test_register_endpoint_accepts_profile_after_sync_for_backcompat(seeded_app):
|
|
"""External clients sending profile_after_sync get no error — the
|
|
field is silently ignored."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "x",
|
|
"source_type": "keboola",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "y",
|
|
"query_mode": "local",
|
|
"profile_after_sync": True, # legacy client may send this
|
|
},
|
|
)
|
|
assert r.status_code == 201
|
|
|
|
|
|
def test_register_endpoint_does_not_persist_profile_after_sync(seeded_app):
|
|
"""The persisted row no longer carries the old profile_after_sync
|
|
value (column may still exist in DB for back-compat, but admin path
|
|
never writes a non-default value)."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
auth = {"Authorization": f"Bearer {token}"}
|
|
r = c.post(
|
|
"/api/admin/register-table",
|
|
headers=auth,
|
|
json={
|
|
"name": "y",
|
|
"source_type": "keboola",
|
|
"bucket": "in.c-foo",
|
|
"source_table": "y",
|
|
"query_mode": "local",
|
|
"profile_after_sync": True,
|
|
},
|
|
)
|
|
assert r.status_code == 201
|
|
r = c.get("/api/admin/registry", headers=auth)
|
|
rows = r.json()["tables"]
|
|
row = next(t for t in rows if t["id"] == "y")
|
|
# The field's value in the registry response is now whatever the DB
|
|
# default is (True per current schema). Critical: the request value
|
|
# is NOT echoed back.
|
|
# If the value is in the response at all (legacy back-compat in the
|
|
# GET serializer), it's the schema default, not the request value.
|
|
# If the value is absent (deprecated and stripped), that's also fine.
|
|
if "profile_after_sync" in row:
|
|
# Whatever this is, it's the schema default, not request-driven.
|
|
assert row["profile_after_sync"] is True or row["profile_after_sync"] is None
|