fix: extractor writes to temp file to avoid lock with orchestrator
Writes extract.duckdb.tmp then renames atomically, avoiding DuckDB lock conflict when orchestrator holds a read connection on extract.duckdb.
This commit is contained in:
parent
675a29c1c7
commit
10d9280ab5
1 changed files with 11 additions and 1 deletions
|
|
@ -52,8 +52,13 @@ def run(output_dir: str, table_configs: List[Dict[str, Any]], keboola_url: str,
|
||||||
data_dir = output_path / "data"
|
data_dir = output_path / "data"
|
||||||
data_dir.mkdir(parents=True, exist_ok=True)
|
data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Write to temp file then rename — avoids lock conflict with orchestrator
|
||||||
|
# which may hold a read lock on the existing extract.duckdb
|
||||||
db_path = output_path / "extract.duckdb"
|
db_path = output_path / "extract.duckdb"
|
||||||
conn = duckdb.connect(str(db_path))
|
tmp_db_path = output_path / "extract.duckdb.tmp"
|
||||||
|
if tmp_db_path.exists():
|
||||||
|
tmp_db_path.unlink()
|
||||||
|
conn = duckdb.connect(str(tmp_db_path))
|
||||||
|
|
||||||
stats = {"tables_extracted": 0, "tables_failed": 0, "errors": []}
|
stats = {"tables_extracted": 0, "tables_failed": 0, "errors": []}
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
|
|
@ -115,6 +120,11 @@ def run(output_dir: str, table_configs: List[Dict[str, Any]], keboola_url: str,
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
# Atomic replace: swap temp DB into place
|
||||||
|
import shutil
|
||||||
|
if tmp_db_path.exists():
|
||||||
|
shutil.move(str(tmp_db_path), str(db_path))
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue