fix: rebuild_source delegates to full rebuild to preserve all source views

_do_rebuild_source was creating a fresh temp DB with only one source,
then atomically replacing analytics.duckdb — wiping views from every
other source. Now it delegates to _do_rebuild so all extract dirs are
re-attached in a single pass.

Adds test_rebuild_source_preserves_other_sources to guard the regression.
This commit is contained in:
ZdenekSrotyr 2026-04-09 06:48:25 +02:00
parent 94c6b0f839
commit cb9c566d07
2 changed files with 39 additions and 19 deletions

View file

@ -114,31 +114,20 @@ class SyncOrchestrator:
return result return result
def _do_rebuild_source(self, source_name: str) -> List[str]: def _do_rebuild_source(self, source_name: str) -> List[str]:
"""Rebuild views for a single source by doing a full rebuild.
A full rebuild is necessary because the analytics DB is created fresh
each time (temp file + atomic swap). Rebuilding only one source would
destroy views from all other sources.
"""
extracts_dir = _get_extracts_dir() extracts_dir = _get_extracts_dir()
db_file = extracts_dir / source_name / "extract.duckdb" db_file = extracts_dir / source_name / "extract.duckdb"
if not db_file.exists(): if not db_file.exists():
logger.warning("No extract.duckdb for source %s", source_name) logger.warning("No extract.duckdb for source %s", source_name)
return [] return []
tmp_path = self._db_path + ".tmp" result = self._do_rebuild()
if Path(tmp_path).exists(): return result.get(source_name, [])
Path(tmp_path).unlink()
conn = duckdb.connect(tmp_path)
try:
# Detach if already attached
try:
conn.execute(f"DETACH {source_name}")
except Exception:
pass
tables = self._attach_and_create_views(conn, source_name, str(db_file))
finally:
conn.close()
import shutil
if Path(tmp_path).exists():
shutil.move(tmp_path, self._db_path)
return tables
def _attach_and_create_views( def _attach_and_create_views(
self, conn: duckdb.DuckDBPyConnection, source_name: str, db_path: str self, conn: duckdb.DuckDBPyConnection, source_name: str, db_path: str

View file

@ -248,6 +248,37 @@ class TestSyncOrchestrator:
assert "keboola" in result assert "keboola" in result
assert "orders" in result["keboola"] assert "orders" in result["keboola"]
def test_rebuild_source_preserves_other_sources(self, setup_env):
"""rebuild_source('jira') must not destroy views from keboola or other sources."""
from src.orchestrator import SyncOrchestrator
_create_mock_extract(
setup_env["extracts_dir"],
"keboola",
[{"name": "orders", "data": [{"id": "1", "total": "100"}]}],
)
_create_mock_extract(
setup_env["extracts_dir"],
"jira",
[{"name": "issues", "data": [{"key": "PROJ-1"}]}],
)
orch = SyncOrchestrator(analytics_db_path=setup_env["analytics_db"])
# Full rebuild — both sources visible
result = orch.rebuild()
assert "keboola" in result
assert "jira" in result
# Jira webhook triggers rebuild_source("jira")
tables = orch.rebuild_source("jira")
assert "issues" in tables
# Full rebuild again (simulates next scheduled run) — keboola must still be there
result2 = orch.rebuild()
assert "keboola" in result2, "keboola must survive after rebuild_source('jira')"
assert "jira" in result2
def test_rebuild_idempotent(self, setup_env): def test_rebuild_idempotent(self, setup_env):
from src.orchestrator import SyncOrchestrator from src.orchestrator import SyncOrchestrator