feat: schema migration v3→v4 with metric_definitions and column_metadata tables
Add SCHEMA_VERSION = 4, _V3_TO_V4_MIGRATIONS list, and if current < 4 block in _ensure_schema(). Both new tables are also added to _SYSTEM_SCHEMA for fresh installs. Tests cover fresh install, all columns, and v3→v4 migration path.
This commit is contained in:
parent
06ac937f8b
commit
bc394bd266
2 changed files with 208 additions and 9 deletions
82
src/db.py
82
src/db.py
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||
|
||||
SCHEMA_VERSION = 3
|
||||
SCHEMA_VERSION = 4
|
||||
|
||||
_SYSTEM_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
|
|
@ -168,6 +168,42 @@ CREATE TABLE IF NOT EXISTS access_requests (
|
|||
reviewed_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT current_timestamp
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS metric_definitions (
|
||||
id VARCHAR PRIMARY KEY,
|
||||
name VARCHAR NOT NULL,
|
||||
display_name VARCHAR NOT NULL,
|
||||
category VARCHAR NOT NULL,
|
||||
description TEXT,
|
||||
type VARCHAR DEFAULT 'sum',
|
||||
unit VARCHAR,
|
||||
grain VARCHAR DEFAULT 'monthly',
|
||||
table_name VARCHAR,
|
||||
tables VARCHAR[],
|
||||
expression VARCHAR,
|
||||
time_column VARCHAR,
|
||||
dimensions VARCHAR[],
|
||||
filters VARCHAR[],
|
||||
synonyms VARCHAR[],
|
||||
notes VARCHAR[],
|
||||
sql TEXT NOT NULL,
|
||||
sql_variants JSON,
|
||||
validation JSON,
|
||||
source VARCHAR DEFAULT 'manual',
|
||||
created_at TIMESTAMP DEFAULT current_timestamp,
|
||||
updated_at TIMESTAMP DEFAULT current_timestamp
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS column_metadata (
|
||||
table_id VARCHAR NOT NULL,
|
||||
column_name VARCHAR NOT NULL,
|
||||
basetype VARCHAR,
|
||||
description VARCHAR,
|
||||
confidence VARCHAR DEFAULT 'manual',
|
||||
source VARCHAR DEFAULT 'manual',
|
||||
updated_at TIMESTAMP DEFAULT current_timestamp,
|
||||
PRIMARY KEY (table_id, column_name)
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -259,6 +295,47 @@ _V2_TO_V3_MIGRATIONS = [
|
|||
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS is_public BOOLEAN DEFAULT true",
|
||||
]
|
||||
|
||||
_V3_TO_V4_MIGRATIONS = [
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS metric_definitions (
|
||||
id VARCHAR PRIMARY KEY,
|
||||
name VARCHAR NOT NULL,
|
||||
display_name VARCHAR NOT NULL,
|
||||
category VARCHAR NOT NULL,
|
||||
description TEXT,
|
||||
type VARCHAR DEFAULT 'sum',
|
||||
unit VARCHAR,
|
||||
grain VARCHAR DEFAULT 'monthly',
|
||||
table_name VARCHAR,
|
||||
tables VARCHAR[],
|
||||
expression VARCHAR,
|
||||
time_column VARCHAR,
|
||||
dimensions VARCHAR[],
|
||||
filters VARCHAR[],
|
||||
synonyms VARCHAR[],
|
||||
notes VARCHAR[],
|
||||
sql TEXT NOT NULL,
|
||||
sql_variants JSON,
|
||||
validation JSON,
|
||||
source VARCHAR DEFAULT 'manual',
|
||||
created_at TIMESTAMP DEFAULT current_timestamp,
|
||||
updated_at TIMESTAMP DEFAULT current_timestamp
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS column_metadata (
|
||||
table_id VARCHAR NOT NULL,
|
||||
column_name VARCHAR NOT NULL,
|
||||
basetype VARCHAR,
|
||||
description VARCHAR,
|
||||
confidence VARCHAR DEFAULT 'manual',
|
||||
source VARCHAR DEFAULT 'manual',
|
||||
updated_at TIMESTAMP DEFAULT current_timestamp,
|
||||
PRIMARY KEY (table_id, column_name)
|
||||
)
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||
"""Create tables if they don't exist. Apply migrations if schema version changed."""
|
||||
|
|
@ -296,6 +373,9 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|||
if current < 3:
|
||||
for sql in _V2_TO_V3_MIGRATIONS:
|
||||
conn.execute(sql)
|
||||
if current < 4:
|
||||
for sql in _V3_TO_V4_MIGRATIONS:
|
||||
conn.execute(sql)
|
||||
conn.execute(
|
||||
"UPDATE schema_version SET version = ?, applied_at = current_timestamp",
|
||||
[SCHEMA_VERSION],
|
||||
|
|
|
|||
135
tests/test_db.py
135
tests/test_db.py
|
|
@ -28,7 +28,7 @@ class TestGetSystemDb:
|
|||
"user_sync_settings", "knowledge_items", "knowledge_votes",
|
||||
"audit_log", "telegram_links", "pending_codes",
|
||||
"script_registry", "table_registry", "table_profiles",
|
||||
"dataset_permissions",
|
||||
"dataset_permissions", "metric_definitions", "column_metadata",
|
||||
}
|
||||
assert expected.issubset(tables), f"Missing: {expected - tables}"
|
||||
finally:
|
||||
|
|
@ -55,11 +55,11 @@ class TestGetSystemDb:
|
|||
class TestGetSchemaVersion:
|
||||
def test_returns_version(self, tmp_path, monkeypatch):
|
||||
_setup_data_dir(tmp_path, monkeypatch)
|
||||
from src.db import get_schema_version, get_system_db
|
||||
from src.db import get_schema_version, get_system_db, SCHEMA_VERSION
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
assert get_schema_version(conn) == 3
|
||||
assert get_schema_version(conn) == SCHEMA_VERSION
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
|
@ -113,7 +113,8 @@ class TestV1ToV2Migration:
|
|||
from src.db import get_system_db, get_schema_version
|
||||
conn2 = get_system_db()
|
||||
try:
|
||||
assert get_schema_version(conn2) == 3
|
||||
from src.db import SCHEMA_VERSION
|
||||
assert get_schema_version(conn2) == SCHEMA_VERSION
|
||||
# Verify old data preserved
|
||||
row = conn2.execute("SELECT name, folder FROM table_registry WHERE id='t1'").fetchone()
|
||||
assert row[0] == "Test"
|
||||
|
|
@ -198,10 +199,10 @@ class TestMigrationSafety:
|
|||
conn.close()
|
||||
|
||||
def test_v2_to_v3_migration(self, tmp_path, monkeypatch):
|
||||
"""v2 DB migrated to v3: schema_version=3 and is_public column added."""
|
||||
"""v2 DB migrated to current schema: is_public column added."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import _ensure_schema, get_schema_version
|
||||
from src.db import _ensure_schema, get_schema_version, SCHEMA_VERSION
|
||||
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
self._create_v2_db(db_path)
|
||||
|
|
@ -209,7 +210,7 @@ class TestMigrationSafety:
|
|||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
_ensure_schema(conn)
|
||||
assert get_schema_version(conn) == 3
|
||||
assert get_schema_version(conn) == SCHEMA_VERSION
|
||||
cols = {
|
||||
r[0]
|
||||
for r in conn.execute(
|
||||
|
|
@ -285,7 +286,8 @@ class TestMigrationSafety:
|
|||
|
||||
_ensure_schema(conn)
|
||||
|
||||
assert get_schema_version(conn) == 3
|
||||
from src.db import SCHEMA_VERSION
|
||||
assert get_schema_version(conn) == SCHEMA_VERSION
|
||||
row = conn.execute(
|
||||
"SELECT name, description FROM table_registry WHERE id='row1'"
|
||||
).fetchone()
|
||||
|
|
@ -343,6 +345,123 @@ class TestMigrationSafety:
|
|||
conn.close()
|
||||
|
||||
|
||||
class TestSchemaV4:
|
||||
"""Tests for v4 schema additions: metric_definitions and column_metadata tables."""
|
||||
|
||||
def test_metric_definitions_table_exists(self, tmp_path, monkeypatch):
|
||||
"""metric_definitions and column_metadata tables exist after get_system_db()."""
|
||||
_setup_data_dir(tmp_path, monkeypatch)
|
||||
from src.db import get_system_db
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
tables = {
|
||||
row[0]
|
||||
for row in conn.execute(
|
||||
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "metric_definitions" in tables, "metric_definitions table missing"
|
||||
assert "column_metadata" in tables, "column_metadata table missing"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_metric_definitions_columns(self, tmp_path, monkeypatch):
|
||||
"""metric_definitions table has all expected columns."""
|
||||
_setup_data_dir(tmp_path, monkeypatch)
|
||||
from src.db import get_system_db
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
cols = {
|
||||
row[0]
|
||||
for row in conn.execute(
|
||||
"SELECT column_name FROM information_schema.columns WHERE table_name = 'metric_definitions'"
|
||||
).fetchall()
|
||||
}
|
||||
expected = {
|
||||
"id", "name", "display_name", "category", "description",
|
||||
"type", "unit", "grain", "table_name", "tables",
|
||||
"expression", "time_column", "dimensions", "filters",
|
||||
"synonyms", "notes", "sql", "sql_variants", "validation",
|
||||
"source", "created_at", "updated_at",
|
||||
}
|
||||
assert expected.issubset(cols), f"Missing columns: {expected - cols}"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_column_metadata_table_exists(self, tmp_path, monkeypatch):
|
||||
"""column_metadata table has all expected columns."""
|
||||
_setup_data_dir(tmp_path, monkeypatch)
|
||||
from src.db import get_system_db
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
cols = {
|
||||
row[0]
|
||||
for row in conn.execute(
|
||||
"SELECT column_name FROM information_schema.columns WHERE table_name = 'column_metadata'"
|
||||
).fetchall()
|
||||
}
|
||||
expected = {
|
||||
"table_id", "column_name", "basetype", "description",
|
||||
"confidence", "source", "updated_at",
|
||||
}
|
||||
assert expected.issubset(cols), f"Missing columns: {expected - cols}"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_v3_to_v4_migration(self, tmp_path, monkeypatch):
|
||||
"""Simulate a v3 database, call get_system_db(), verify it migrates to v4."""
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
import duckdb as _duckdb
|
||||
from src.db import get_system_db, get_schema_version
|
||||
|
||||
# Build a minimal v3 database manually
|
||||
db_path = tmp_path / "state" / "system.duckdb"
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = _duckdb.connect(str(db_path))
|
||||
try:
|
||||
conn.execute(
|
||||
"CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp);"
|
||||
"INSERT INTO schema_version (version) VALUES (3);"
|
||||
)
|
||||
# Create the tables that exist in v3 (minimal stubs)
|
||||
conn.execute("CREATE TABLE IF NOT EXISTS table_registry (id VARCHAR PRIMARY KEY, name VARCHAR NOT NULL, is_public BOOLEAN DEFAULT true)")
|
||||
for ddl in [
|
||||
"CREATE TABLE IF NOT EXISTS users (id VARCHAR PRIMARY KEY, email VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_state (table_id VARCHAR PRIMARY KEY)",
|
||||
"CREATE TABLE IF NOT EXISTS sync_history (id VARCHAR PRIMARY KEY, table_id VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS user_sync_settings (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_items (id VARCHAR PRIMARY KEY, title VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS knowledge_votes (item_id VARCHAR, user_id VARCHAR, PRIMARY KEY(item_id, user_id))",
|
||||
"CREATE TABLE IF NOT EXISTS audit_log (id VARCHAR PRIMARY KEY, action VARCHAR)",
|
||||
"CREATE TABLE IF NOT EXISTS telegram_links (user_id VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS pending_codes (code VARCHAR PRIMARY KEY, chat_id BIGINT)",
|
||||
"CREATE TABLE IF NOT EXISTS script_registry (id VARCHAR PRIMARY KEY, name VARCHAR, source TEXT)",
|
||||
"CREATE TABLE IF NOT EXISTS table_profiles (table_id VARCHAR PRIMARY KEY, profile JSON)",
|
||||
"CREATE TABLE IF NOT EXISTS dataset_permissions (user_id VARCHAR, dataset VARCHAR, PRIMARY KEY(user_id, dataset))",
|
||||
"CREATE TABLE IF NOT EXISTS access_requests (id VARCHAR PRIMARY KEY, user_id VARCHAR, user_email VARCHAR, table_id VARCHAR)",
|
||||
]:
|
||||
conn.execute(ddl)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
conn2 = get_system_db()
|
||||
try:
|
||||
assert get_schema_version(conn2) == 4, f"Expected version 4, got {get_schema_version(conn2)}"
|
||||
tables = {
|
||||
row[0]
|
||||
for row in conn2.execute(
|
||||
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "metric_definitions" in tables, "metric_definitions table missing after migration"
|
||||
assert "column_metadata" in tables, "column_metadata table missing after migration"
|
||||
finally:
|
||||
conn2.close()
|
||||
|
||||
|
||||
class TestGetAnalyticsDbReadonly:
|
||||
def test_analytics_readonly_rejects_malicious_dir_name(self, tmp_path, monkeypatch):
|
||||
"""Directories with SQL-injection chars in their name are skipped."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue