diff --git a/CHANGELOG.md b/CHANGELOG.md index 22e6dcd..689b41c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup` page now produces a - `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`. - Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`). - `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces. +- **Schema v24 migration retry path is no longer dead** (Devin Review on `db.py:1757`, escalated from advisory to critical on rescan). Pre-fix: when `_v23_to_v24_finalize` had materialized BQ rows to migrate but `data_source.bigquery.project` was not configured, it logged a warning per row and returned normally. The schema_version then bumped to 24 unconditionally, the `if current < 24:` gate in `_ensure_schema` skipped the function on every subsequent startup, and the affected rows kept their DuckDB-flavor `bq."ds"."tbl"` source_query forever — which the new `_wrap_admin_sql_for_jobs_api` wrapping path rejects as unparseable BQ SQL with no automatic recovery. The "set the project and restart to retry" log hint pointed at a code path that no longer ran. Fix: the migration now raises `RuntimeError` BEFORE the schema_version bump when it has rows to migrate but no project_id, blocking startup with a clear actionable error pointing at `data_source.bigquery.project`. Operator configures the project, restarts, and the migration completes (schema_version is still at 23, so the `if current < 24:` gate fires). Side effect: a BQ-using deployment that hasn't set the project blocks startup until they do — that's the right call for a config error that would otherwise silently break materialized tables. Two regression tests in `test_schema_v24_source_query_rewrite.py`: `test_v24_raises_when_project_not_configured_and_rows_need_migration` (raise + version-stays-at-23) and `test_v24_skips_clean_when_no_rows_match_even_without_project` (no-rows-no-block invariant). - **`agnes admin register-table` UX**: three real-world feedback items addressed. - **`--query-mode materialized` now requires `--bucket`** (client-side validation; exits with a clear error before hitting the server). The previous help docstring claimed `--bucket` was *ignored* for materialized rows, but the value is actually load-bearing — `agnes schema ` builds the BQ identifier as `bq..`, so an empty bucket registered the row but broke subsequent schema/describe with HTTP 400 "unsafe BQ identifier in registry". Docstring rewritten to reflect reality. - **Post-success hints**: after a successful registration the CLI now points operators at the two follow-ups they routinely miss: (a) `agnes setup first-sync` to materialize the parquet (registration alone doesn't trigger a build; `agnes pull` reports "Updated 0 tables" until the scheduler tick), and (b) `agnes admin grant create table ` to make the row visible in `agnes catalog` for non-admin users (catalog is RBAC-filtered). diff --git a/src/db.py b/src/db.py index 57390aa..27617d7 100644 --- a/src/db.py +++ b/src/db.py @@ -1729,19 +1729,37 @@ def _v23_to_v24_finalize(conn: duckdb.DuckDBPyConnection) -> None: if not rows: return # Nothing to migrate; skip the transaction. + # If we have rows to migrate AND project_id isn't configured, we cannot + # rewrite their source_query. Raise BEFORE the schema_version bump so + # the migration re-runs on the NEXT startup (after the operator + # configures the project). Pre-fix the function logged a warning per + # row and returned normally — the schema_version then bumped to 24 + # unconditionally, the `if current < 24:` gate skipped this function + # forever after, and rows stayed in DuckDB-flavor SQL. The new + # `_wrap_admin_sql_for_jobs_api` wrapping path then rejected those + # rows at materialize time as unparseable BQ SQL with no automatic + # recovery (Devin Review on db.py:1757). Side effect: a BQ-using + # deployment that hasn't set the project blocks startup until they + # do — that's the right call for a config error that would otherwise + # silently break materialized tables. + if not project_id: + raise RuntimeError( + f"v24 migration cannot complete: {len(rows)} materialized " + f"BigQuery row(s) need their source_query rewritten from " + f"DuckDB-flavor `bq.\"ds\".\"tbl\"` to BQ-native " + f"`.ds.tbl`, but `data_source.bigquery.project` is " + f"not configured. Set it via /admin/server-config (or " + f"`instance.yaml: data_source.bigquery.project`) and restart " + f"the app to retry the migration. The schema version is NOT " + f"bumped to 24 until this completes; pre-migration DB " + f"snapshot is at `{{DATA_DIR}}/state/system.duckdb.pre-migrate`." + ) + conn.execute("BEGIN TRANSACTION") try: for row_id, sq in rows: if sq is None: continue - if not project_id: - logger.warning( - "v24 migration: skipping rewrite of source_query for row %r — " - "data_source.bigquery.project is not configured. Set it via " - "/admin/server-config and restart the app to retry the " - "migration.", row_id, - ) - continue new_sq = pattern.sub(_replace_for_v24(project_id), sq) if new_sq != sq: conn.execute( diff --git a/tests/test_schema_v24_source_query_rewrite.py b/tests/test_schema_v24_source_query_rewrite.py index 2f0f947..84f1237 100644 --- a/tests/test_schema_v24_source_query_rewrite.py +++ b/tests/test_schema_v24_source_query_rewrite.py @@ -95,7 +95,24 @@ def test_v24_idempotent_when_already_bq_native(monkeypatch): conn.close() -def test_v24_logs_warning_when_project_not_configured(monkeypatch, caplog): +def test_v24_raises_when_project_not_configured_and_rows_need_migration(monkeypatch): + """Regression for Devin Review on db.py:1757. + + Pre-fix: when v24 migration found rows to migrate but + `data_source.bigquery.project` was empty, it logged a warning per + row and returned normally. The schema_version then bumped to 24 + unconditionally → next start's `if current < 24:` gate skipped + `_v23_to_v24_finalize` forever, leaving rows in DuckDB-flavor SQL + that the new `_wrap_admin_sql_for_jobs_api` rejects as unparseable. + The "set the project and restart to retry" log hint pointed at a + code path that no longer ran. + + Post-fix: the migration raises a RuntimeError BEFORE the + schema_version bump. The exception propagates out of `_ensure_schema`, + blocking app startup with a clear actionable error. Operator + configures the project, restarts, and the migration completes + because schema_version is still 23. + """ with tempfile.TemporaryDirectory() as tmp: monkeypatch.setenv("DATA_DIR", tmp) monkeypatch.setattr( @@ -112,20 +129,64 @@ def test_v24_logs_warning_when_project_not_configured(monkeypatch, caplog): ["t1", "t1", "bigquery", "materialized", "ds", "tbl", 'SELECT * FROM bq."ds"."tbl"'], ) - with caplog.at_level("WARNING"): + + # Migration must REFUSE to bump schema_version when it can't + # complete the row-rewrite. The error message must point the + # operator at the right knob (`data_source.bigquery.project`). + with pytest.raises(RuntimeError) as exc: _ensure_schema(conn) + msg = str(exc.value) + assert "data_source.bigquery.project" in msg + assert "restart" in msg.lower() + + # Row stays in DuckDB-flavor (we couldn't rewrite it). row = conn.execute( "SELECT source_query FROM table_registry WHERE id='t1'" ).fetchone() assert row[0] == 'SELECT * FROM bq."ds"."tbl"' - assert any( - "v24" in r.message.lower() or "project" in r.message.lower() - for r in caplog.records + + # Critical: schema_version stays at 23 so the migration retries + # on the next startup once the operator configures the project. + from src.db import get_schema_version + assert get_schema_version(conn) == 23, ( + "schema_version must NOT bump to 24 when v24 migration is " + "deferred — otherwise the documented retry path is dead" ) finally: conn.close() +def test_v24_skips_clean_when_no_rows_match_even_without_project(monkeypatch): + """Counterpart to the raise-on-deferred test: a deployment with NO + materialized BQ rows (typical Keboola-only or remote-only install) + must NOT block startup just because `data_source.bigquery.project` + isn't configured. The `if not rows: return` early-out at the top of + `_v23_to_v24_finalize` handles this — the raise only fires when + there's actual work to defer.""" + import pytest as _pytest # noqa: F401 (referenced if test extends later) + + with tempfile.TemporaryDirectory() as tmp: + monkeypatch.setenv("DATA_DIR", tmp) + monkeypatch.setattr( + "app.instance_config.get_value", + lambda *args, **kw: kw.get("default", ""), + ) + Path(tmp, "state").mkdir(parents=True, exist_ok=True) + db_path = Path(tmp, "state", "system.duckdb") + conn = duckdb.connect(str(db_path)) + try: + _seed_v23(conn) + # No materialized BQ rows seeded. + + # Must NOT raise — there's nothing to migrate. + _ensure_schema(conn) + + from src.db import get_schema_version, SCHEMA_VERSION + assert get_schema_version(conn) == SCHEMA_VERSION + finally: + conn.close() + + def test_v24_keboola_materialized_row_not_rewritten(monkeypatch): """Materialized rows with source_type != 'bigquery' must not be touched by v24. Keboola materialized has no notion of bq."ds"."tbl" syntax;