"""v20 adds source_query column to table_registry. Backs query_mode='materialized' for BigQuery: admin registers a SQL body that the scheduler runs through the DuckDB BQ extension and writes as a parquet to /data/extracts/bigquery/data/.parquet. The v19 step (#150) drops dataset_permissions, access_requests tables and users.role, table_registry.is_public columns; v20 then ALTERs the post-v19 table_registry to add the source_query column. """ import duckdb from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version def test_schema_version_is_36(): # v27 → v28: explicit-install (Model B) for curated marketplace plugins. # user_plugin_optouts row presence flips meaning from "excluded" to # "subscribed"; migration wipes existing rows so the inverted reading # starts from a clean baseline. Also adds marketplace_plugins.created_at # (per-plugin "newest first" sort on /marketplace), backfilled from # parent marketplace_registry.registered_at. # v28 → v29: /home page rollout — instance_templates singleton # consolidation (welcome_template + claude_md_template merged) + new # users.onboarded column. See tests/test_v29_home_migration.py for # the exhaustive coverage of that step. # v29 → v30: news_template — single versioned table for the /home # news perex + /news permalink page. See # tests/test_news_template_repository.py. # v30 → v31: session-pipeline framework — session_processor_state # replaces session_extraction_state with composite PK. # v31 → v32 (this PR): flea-market upload guardrails — adds # store_entities.visibility_status + creates store_submissions. # v32 → v33 (this PR): forensic columns on store_submissions — # file_size, bundle_sha256, bundle_purged_at. Underpins the # persist-blocked-bundle behavior so admins can Rescan / # Override / Download; 30-day TTL purge clears bytes while # keeping the row + sha intact. See docs/STORE_GUARDRAILS.md. # v33 → v34: drop store_submissions.retry_count — counter mixed LLM # error count + admin rescan count, redundant with audit_log. # v34 → v35 (this PR): store_entities gains 'archived' visibility # state + archived_at + archived_by audit columns. Owner # soft-delete writes 'archived'; existing user_store_installs # keep serving the bundle through marketplace.zip / .git. # Hard delete (DELETE ?hard=true) remains admin-only. # v35 → v36 (PR #233 follow-up): re-apply NOT NULL + DEFAULT 'pending' # on store_entities.visibility_status. Lost in the v34→v35 # column rebuild. Without this, an INSERT that omits the # column lands NULL → repo reads None → undefined behavior # in the visibility gates. Value-list invariant remains # enforced application-side (DuckDB ADD CHECK on existing # column not supported). assert SCHEMA_VERSION == 36 def test_v20_adds_source_query(tmp_path): db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) _ensure_schema(conn) cols = { r[0] for r in conn.execute( "SELECT column_name FROM information_schema.columns " "WHERE table_name = 'table_registry'" ).fetchall() } assert "source_query" in cols, f"source_query missing from {cols}" assert get_schema_version(conn) == SCHEMA_VERSION conn.close() def test_claude_md_template_seeded_in_instance_templates(tmp_path): """v23 introduced claude_md_template as a singleton table; v28 consolidates it into instance_templates keyed 'claude_md'. Post-v28 the legacy table is dropped — the canonical lookup is `instance_templates WHERE key='claude_md'`. See tests/test_v28_migration.py for the migration path coverage. This test just verifies the seeded row is present on a fresh install. """ db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) _ensure_schema(conn) tables = { r[0] for r in conn.execute( "SELECT table_name FROM information_schema.tables " "WHERE table_schema = 'main'" ).fetchall() } assert "instance_templates" in tables assert "claude_md_template" not in tables, ( "claude_md_template should be consolidated away post-v28" ) row = conn.execute( "SELECT key, content FROM instance_templates WHERE key = 'claude_md'" ).fetchone() assert row is not None assert row[0] == "claude_md" assert row[1] is None # default = no override conn.close() def test_v19_db_migrates_to_v20(tmp_path): """Pre-existing v19 DB (post-RBAC-drop) without source_query upgrades cleanly without losing data.""" db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) # Simulate a v19 DB at minimal but realistic shape: schema_version row + # a table_registry row in the post-v19 column shape (no is_public column, # since v19 finalize dropped it via the table-rebuild idiom). conn.execute( "CREATE TABLE schema_version (version INTEGER, " "applied_at TIMESTAMP DEFAULT current_timestamp)" ) conn.execute("INSERT INTO schema_version (version) VALUES (19)") conn.execute("""CREATE TABLE table_registry ( id VARCHAR PRIMARY KEY, name VARCHAR NOT NULL, source_type VARCHAR, bucket VARCHAR, source_table VARCHAR, sync_strategy VARCHAR DEFAULT 'full_refresh', query_mode VARCHAR DEFAULT 'local', sync_schedule VARCHAR, profile_after_sync BOOLEAN DEFAULT true, primary_key VARCHAR, folder VARCHAR, description TEXT, registered_by VARCHAR, registered_at TIMESTAMP DEFAULT current_timestamp )""") conn.execute("INSERT INTO table_registry (id, name) VALUES ('foo', 'foo')") _ensure_schema(conn) assert get_schema_version(conn) == SCHEMA_VERSION # bumped 19→28 forward cols = { r[0] for r in conn.execute( "SELECT column_name FROM information_schema.columns " "WHERE table_name = 'table_registry'" ).fetchall() } assert "source_query" in cols # Existing row preserved, new column NULL row = conn.execute( "SELECT id, source_query FROM table_registry WHERE id='foo'" ).fetchone() assert row == ("foo", None) conn.close() def test_v35_to_v36_reapplies_visibility_constraints(tmp_path): """v34→v35 dropped NOT NULL + DEFAULT when rebuilding the column to drop the legacy CHECK; v35→v36 re-applies them. Verifies that on a freshly migrated DB, an INSERT omitting visibility_status either inherits the default 'pending' or fails — never lands NULL. """ db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) _ensure_schema(conn) assert get_schema_version(conn) == SCHEMA_VERSION cols = conn.execute( "SELECT column_name, is_nullable, column_default " "FROM information_schema.columns " "WHERE table_name = 'store_entities' " " AND column_name = 'visibility_status'" ).fetchall() assert cols, "visibility_status column missing from store_entities" name, is_nullable, default_expr = cols[0] assert is_nullable == "NO", ( f"visibility_status must be NOT NULL after v36; got is_nullable={is_nullable!r}" ) # DuckDB renders the default as a quoted literal — match either form. assert default_expr is not None, "visibility_status DEFAULT must be set" assert "pending" in str(default_expr).lower(), ( f"visibility_status DEFAULT must be 'pending'; got {default_expr!r}" ) conn.close()