"""v20 adds source_query column to table_registry. Backs query_mode='materialized' for BigQuery: admin registers a SQL body that the scheduler runs through the DuckDB BQ extension and writes as a parquet to /data/extracts/bigquery/data/.parquet. The v19 step (#150) drops dataset_permissions, access_requests tables and users.role, table_registry.is_public columns; v20 then ALTERs the post-v19 table_registry to add the source_query column. """ import duckdb from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version def test_schema_version_is_31(): # v27 → v28: explicit-install (Model B) for curated marketplace plugins. # user_plugin_optouts row presence flips meaning from "excluded" to # "subscribed"; migration wipes existing rows so the inverted reading # starts from a clean baseline. Also adds marketplace_plugins.created_at # (per-plugin "newest first" sort on /marketplace), backfilled from # parent marketplace_registry.registered_at. # v28 → v29: /home page rollout — instance_templates singleton # consolidation (welcome_template + claude_md_template merged) + new # users.onboarded column. See tests/test_v29_home_migration.py for # the exhaustive coverage of that step. # v29 → v30: news_template — single versioned table for the /home # news perex + /news permalink page. See # tests/test_news_template_repository.py. # v30 → v31: session-pipeline framework. Renames session_extraction_state # → session_processor_state with composite PK (processor_name, # session_file) so multiple processors can track their own # processed-set independently. Existing rows are copied across with # processor_name='verification'; the old table is dropped. assert SCHEMA_VERSION == 31 def test_v20_adds_source_query(tmp_path): db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) _ensure_schema(conn) cols = { r[0] for r in conn.execute( "SELECT column_name FROM information_schema.columns " "WHERE table_name = 'table_registry'" ).fetchall() } assert "source_query" in cols, f"source_query missing from {cols}" assert get_schema_version(conn) == SCHEMA_VERSION conn.close() def test_claude_md_template_seeded_in_instance_templates(tmp_path): """v23 introduced claude_md_template as a singleton table; v28 consolidates it into instance_templates keyed 'claude_md'. Post-v28 the legacy table is dropped — the canonical lookup is `instance_templates WHERE key='claude_md'`. See tests/test_v28_migration.py for the migration path coverage. This test just verifies the seeded row is present on a fresh install. """ db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) _ensure_schema(conn) tables = { r[0] for r in conn.execute( "SELECT table_name FROM information_schema.tables " "WHERE table_schema = 'main'" ).fetchall() } assert "instance_templates" in tables assert "claude_md_template" not in tables, ( "claude_md_template should be consolidated away post-v28" ) row = conn.execute( "SELECT key, content FROM instance_templates WHERE key = 'claude_md'" ).fetchone() assert row is not None assert row[0] == "claude_md" assert row[1] is None # default = no override conn.close() def test_v19_db_migrates_to_v20(tmp_path): """Pre-existing v19 DB (post-RBAC-drop) without source_query upgrades cleanly without losing data.""" db_path = tmp_path / "system.duckdb" conn = duckdb.connect(str(db_path)) # Simulate a v19 DB at minimal but realistic shape: schema_version row + # a table_registry row in the post-v19 column shape (no is_public column, # since v19 finalize dropped it via the table-rebuild idiom). conn.execute( "CREATE TABLE schema_version (version INTEGER, " "applied_at TIMESTAMP DEFAULT current_timestamp)" ) conn.execute("INSERT INTO schema_version (version) VALUES (19)") conn.execute("""CREATE TABLE table_registry ( id VARCHAR PRIMARY KEY, name VARCHAR NOT NULL, source_type VARCHAR, bucket VARCHAR, source_table VARCHAR, sync_strategy VARCHAR DEFAULT 'full_refresh', query_mode VARCHAR DEFAULT 'local', sync_schedule VARCHAR, profile_after_sync BOOLEAN DEFAULT true, primary_key VARCHAR, folder VARCHAR, description TEXT, registered_by VARCHAR, registered_at TIMESTAMP DEFAULT current_timestamp )""") conn.execute("INSERT INTO table_registry (id, name) VALUES ('foo', 'foo')") _ensure_schema(conn) assert get_schema_version(conn) == SCHEMA_VERSION # bumped 19→28 forward cols = { r[0] for r in conn.execute( "SELECT column_name FROM information_schema.columns " "WHERE table_name = 'table_registry'" ).fetchall() } assert "source_query" in cols # Existing row preserved, new column NULL row = conn.execute( "SELECT id, source_query FROM table_registry WHERE id='foo'" ).fetchone() assert row == ("foo", None) conn.close()