* fix(db): get_analytics_db() singleton mirroring get_system_db pattern Closes #163. Pre-fix `get_analytics_db()` opened a fresh `duckdb.connect()` on every call; most callers don't `.close()` the returned handle, so each leaked connection held a WAL ref + FD until GC kicked in. Under load this manifested as "too many open files" or DuckDB lock contention on the analytics DB. Singleton + cursor-per-call mirrors `get_system_db()` (lines 882-904 of src/db.py) — one underlying connection persists; callers that .close() the returned handle only close the cursor. Re-opens transparently when DATA_DIR changes (test fixtures that swap data dirs across cases). `get_analytics_db_readonly()` deliberately stays per-call — each invocation re-ATTACHes extract.duckdb files into a fresh read-only context, so caching the connection would require careful re-ATTACH bookkeeping the read-only path doesn't currently do. New `close_analytics_db()` mirrors `close_system_db()` (best-effort CHECKPOINT then close, swallow exceptions). Wired into the FastAPI shutdown hook in `app/main.py` alongside `close_system_db()`. 5 tests in `tests/test_analytics_db_singleton.py` pin the contract: - caches connection (two calls → same _analytics_db_conn) - closing a cursor doesn't close the underlying connection - DATA_DIR change → singleton drops + reopens - thread-safe (16 concurrent calls share the singleton) - close_analytics_db() clears state + reopen works Out of scope (per #163): auditing all caller sites to drop their now- redundant `.close()` calls. Closing a cursor is harmless; the production benefit is the connection cap. Verified: 4525 passed locally (4520 + 5 new), 1 pre-existing fail in test_readers_in_pre_init_dir (subprocess timeout, on origin/main, no relation to this diff). * release: 0.54.5 — close #163 analytics_db singleton Last commit on the PR per CLAUDE.md hard rule. Patch bump (0.54.4 → 0.54.5) for the get_analytics_db() singleton refactor. No DB migration; no operator-facing config change. Internal-only cleanup of a known FD-leak under load. Closes #163.
153 lines
5.9 KiB
Python
153 lines
5.9 KiB
Python
"""`get_analytics_db()` is a singleton mirroring `get_system_db()` (#163).
|
|
|
|
Pre-#163 every call opened a fresh `duckdb.connect()` — most callers
|
|
don't `.close()` the returned handle, so each leaked connection held a
|
|
WAL ref + FD until GC kicked in. Under load this manifested as "too
|
|
many open files" or DuckDB lock contention on the analytics DB.
|
|
|
|
These tests pin the new contract so any regression to per-call
|
|
`duckdb.connect()` is loud:
|
|
|
|
1. Two consecutive calls return cursors backed by the same connection.
|
|
2. Closing one cursor does NOT close the underlying connection.
|
|
3. `DATA_DIR` change → fresh connection on next call.
|
|
4. Concurrent calls don't race (the lock serializes init).
|
|
5. `close_analytics_db()` clears the singleton + a subsequent call
|
|
reopens cleanly.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import threading
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_singleton(monkeypatch, tmp_path):
|
|
"""Each test gets its own DATA_DIR + clean singleton state.
|
|
|
|
Reset both globals before AND after the test so a leak from a
|
|
previous test (this file or anywhere else in the suite) doesn't
|
|
pollute the case under inspection.
|
|
"""
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
import src.db as db_mod
|
|
db_mod._analytics_db_conn = None
|
|
db_mod._analytics_db_path = None
|
|
yield
|
|
db_mod._analytics_db_conn = None
|
|
db_mod._analytics_db_path = None
|
|
|
|
|
|
def test_get_analytics_db_caches_connection():
|
|
"""Two consecutive calls must share the same underlying DuckDB
|
|
connection object — not open a fresh one each time."""
|
|
from src.db import get_analytics_db
|
|
import src.db as db_mod
|
|
cur1 = get_analytics_db()
|
|
cur2 = get_analytics_db()
|
|
# Cursors are different objects (DuckDB returns a fresh cursor each
|
|
# call) but they're both backed by `_analytics_db_conn` — only one
|
|
# underlying connection should have been opened.
|
|
assert db_mod._analytics_db_conn is not None
|
|
assert cur1 is not cur2 # cursors differ
|
|
# Sanity: both cursors execute against the same DB by writing +
|
|
# reading via the shared connection.
|
|
cur1.execute("CREATE TABLE singleton_probe (x INTEGER)")
|
|
cur2.execute("INSERT INTO singleton_probe VALUES (42)")
|
|
rows = cur1.execute("SELECT x FROM singleton_probe").fetchall()
|
|
assert rows == [(42,)]
|
|
|
|
|
|
def test_closing_cursor_does_not_close_connection():
|
|
"""The whole point of `.cursor()` indirection — close the cursor
|
|
handle, the underlying connection stays usable for the next call."""
|
|
from src.db import get_analytics_db
|
|
import src.db as db_mod
|
|
cur1 = get_analytics_db()
|
|
cur1.execute("CREATE TABLE probe (x INTEGER)")
|
|
cur1.close() # caller is allowed to do this; mustn't break #2 call
|
|
# The connection itself must still be alive on the singleton.
|
|
assert db_mod._analytics_db_conn is not None
|
|
cur2 = get_analytics_db()
|
|
rows = cur2.execute("SELECT COUNT(*) FROM probe").fetchall()
|
|
assert rows == [(0,)]
|
|
|
|
|
|
def test_get_analytics_db_reopens_on_data_dir_change(tmp_path, monkeypatch):
|
|
"""When DATA_DIR (the resolved path) changes, the singleton must
|
|
drop the old connection and open a fresh one against the new path.
|
|
This is the test-fixture path — production never moves DATA_DIR
|
|
mid-process, but pytest fixtures do."""
|
|
import src.db as db_mod
|
|
from src.db import get_analytics_db
|
|
cur1 = get_analytics_db()
|
|
cur1.execute("CREATE TABLE marker_a (x INTEGER)")
|
|
conn_a = db_mod._analytics_db_conn
|
|
|
|
# Move to a new DATA_DIR — singleton must reopen.
|
|
new_dir = tmp_path.parent / "alt-data"
|
|
new_dir.mkdir(exist_ok=True)
|
|
monkeypatch.setenv("DATA_DIR", str(new_dir))
|
|
cur2 = get_analytics_db()
|
|
conn_b = db_mod._analytics_db_conn
|
|
|
|
assert conn_a is not conn_b, "singleton should have reopened on DATA_DIR change"
|
|
# The new DB doesn't have marker_a — confirms it's a fresh DB at the new path.
|
|
with pytest.raises(Exception):
|
|
cur2.execute("SELECT * FROM marker_a")
|
|
|
|
|
|
def test_get_analytics_db_thread_safe():
|
|
"""Concurrent calls from N threads must produce exactly ONE
|
|
underlying connection (the lock serializes the init branch)."""
|
|
from src.db import get_analytics_db
|
|
import src.db as db_mod
|
|
|
|
errors: list[BaseException] = []
|
|
cursors: list = []
|
|
|
|
def worker():
|
|
try:
|
|
cur = get_analytics_db()
|
|
cursors.append(cur)
|
|
except BaseException as e:
|
|
errors.append(e)
|
|
|
|
threads = [threading.Thread(target=worker) for _ in range(16)]
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join(timeout=10)
|
|
|
|
assert errors == [], errors
|
|
assert len(cursors) == 16
|
|
# All cursors share one connection.
|
|
assert db_mod._analytics_db_conn is not None
|
|
# Any race-induced second connection would be re-assigned and the
|
|
# first would orphan; we can't probe that directly, but functionally
|
|
# all 16 threads must see the SAME singleton state.
|
|
cursors[0].execute("CREATE TABLE thread_probe (x INTEGER)")
|
|
rows = cursors[15].execute("SELECT COUNT(*) FROM thread_probe").fetchall()
|
|
assert rows == [(0,)], "16th thread's cursor doesn't see the 1st's table — race"
|
|
|
|
|
|
def test_close_analytics_db_clears_singleton_and_reopen_works():
|
|
"""Shutdown hook clears the singleton; a subsequent call after
|
|
re-init (test process keeps running) must reopen cleanly."""
|
|
import src.db as db_mod
|
|
from src.db import close_analytics_db, get_analytics_db
|
|
cur1 = get_analytics_db()
|
|
cur1.execute("CREATE TABLE probe (x INTEGER)")
|
|
assert db_mod._analytics_db_conn is not None
|
|
|
|
close_analytics_db()
|
|
assert db_mod._analytics_db_conn is None
|
|
assert db_mod._analytics_db_path is None
|
|
|
|
# Re-open after close: fresh cursor, table from previous session
|
|
# PERSISTS on disk (we close, not nuke).
|
|
cur2 = get_analytics_db()
|
|
rows = cur2.execute("SELECT COUNT(*) FROM probe").fetchall()
|
|
assert rows == [(0,)]
|