From cb9c566d0783a45276606a76448e65b5ee8205aa Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 9 Apr 2026 06:48:25 +0200 Subject: [PATCH] fix: rebuild_source delegates to full rebuild to preserve all source views MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _do_rebuild_source was creating a fresh temp DB with only one source, then atomically replacing analytics.duckdb — wiping views from every other source. Now it delegates to _do_rebuild so all extract dirs are re-attached in a single pass. Adds test_rebuild_source_preserves_other_sources to guard the regression. --- src/orchestrator.py | 27 ++++++++------------------- tests/test_orchestrator.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/orchestrator.py b/src/orchestrator.py index 26df041..b4e97ed 100644 --- a/src/orchestrator.py +++ b/src/orchestrator.py @@ -114,31 +114,20 @@ class SyncOrchestrator: return result def _do_rebuild_source(self, source_name: str) -> List[str]: + """Rebuild views for a single source by doing a full rebuild. + + A full rebuild is necessary because the analytics DB is created fresh + each time (temp file + atomic swap). Rebuilding only one source would + destroy views from all other sources. + """ extracts_dir = _get_extracts_dir() db_file = extracts_dir / source_name / "extract.duckdb" if not db_file.exists(): logger.warning("No extract.duckdb for source %s", source_name) return [] - tmp_path = self._db_path + ".tmp" - if Path(tmp_path).exists(): - Path(tmp_path).unlink() - conn = duckdb.connect(tmp_path) - try: - # Detach if already attached - try: - conn.execute(f"DETACH {source_name}") - except Exception: - pass - tables = self._attach_and_create_views(conn, source_name, str(db_file)) - finally: - conn.close() - - import shutil - if Path(tmp_path).exists(): - shutil.move(tmp_path, self._db_path) - - return tables + result = self._do_rebuild() + return result.get(source_name, []) def _attach_and_create_views( self, conn: duckdb.DuckDBPyConnection, source_name: str, db_path: str diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 429d3dc..cefdb9a 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -248,6 +248,37 @@ class TestSyncOrchestrator: assert "keboola" in result assert "orders" in result["keboola"] + def test_rebuild_source_preserves_other_sources(self, setup_env): + """rebuild_source('jira') must not destroy views from keboola or other sources.""" + from src.orchestrator import SyncOrchestrator + + _create_mock_extract( + setup_env["extracts_dir"], + "keboola", + [{"name": "orders", "data": [{"id": "1", "total": "100"}]}], + ) + _create_mock_extract( + setup_env["extracts_dir"], + "jira", + [{"name": "issues", "data": [{"key": "PROJ-1"}]}], + ) + + orch = SyncOrchestrator(analytics_db_path=setup_env["analytics_db"]) + + # Full rebuild — both sources visible + result = orch.rebuild() + assert "keboola" in result + assert "jira" in result + + # Jira webhook triggers rebuild_source("jira") + tables = orch.rebuild_source("jira") + assert "issues" in tables + + # Full rebuild again (simulates next scheduled run) — keboola must still be there + result2 = orch.rebuild() + assert "keboola" in result2, "keboola must survive after rebuild_source('jira')" + assert "jira" in result2 + def test_rebuild_idempotent(self, setup_env): from src.orchestrator import SyncOrchestrator