* fix(security): RBAC filter for agnes_sessions matches both email local-part and user_id
The upload API (POST /api/upload/sessions) stores session files under
user_sessions/{user_id}/ (UUID), while the session collector uses the
OS username (email local-part). The session pipeline writes the directory
name verbatim into usage_session_summary.username, so the column can
contain either value depending on the ingestion path.
The RBAC filter in build_filter_clause previously only matched the email
local-part, missing sessions uploaded via the API. The fix adds an OR
condition so non-admin users see rows where username matches either their
email local-part or their user_id.
Closes #293
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): RBAC filter uses stable user_id instead of mutable email local-part
Closes #293
Previous fix used OR condition matching both email local-part and user_id
in the username column. This was fragile: email changes would break
filtering. This commit introduces a dedicated user_id column populated
by the session pipeline via resolve_user_id(), and switches the RBAC
filter to use it exclusively.
Changes:
- Schema v45: add user_id column to usage_session_summary and usage_events
- UsageProcessor: accept and store user_id in both tables
- runner.py: resolve_user_id() maps directory name to users.id UUID
(exact match for UUID dirs, email LIKE for local-part dirs)
- INTERNAL_TABLES: agnes_sessions/agnes_telemetry filter on user_id column
- build_filter_clause: simplified to WHERE user_id = '<uuid>' (no OR)
- me.py/admin_user_sessions.py: query by user_id OR username for
backward compatibility during transition
- USAGE_PROCESSOR_VERSION bumped 2→3 to trigger reprocessing/backfill
- Tests updated: 27 pass including new email-change resilience test
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(tests): bump schema version assertions 44→45
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(docs): correct resolve_user_id docstring, add TypeError comment
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address review — backward-compat OR, LIKE escaping, narrower TypeError
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address code review — eliminate TypeError hack, add resolve_user_id tests
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(db): create user_id indexes in _v44_to_v45, not _SYSTEM_SCHEMA
_SYSTEM_SCHEMA runs before the migration ladder. On an upgrade from
v42/v43/v44, usage_events / usage_session_summary already exist without
the user_id column (CREATE TABLE IF NOT EXISTS is a no-op), so the
CREATE INDEX ... (user_id) lines in _SYSTEM_SCHEMA failed to bind and
aborted _ensure_schema — the app would not start post-upgrade. Move the
index creation to _v44_to_v45, which ADDs the column first. Same pattern
as the v41 audit_log indices.
* fix(usage): bump USAGE_PROCESSOR_VERSION 3→4 for user_id backfill
#303 shipped USAGE_PROCESSOR_VERSION=3 (release 0.54.12) for its
<command-name> slash extraction. This PR's 2→3 bump collided with it
on rebase, so the reprocess loop would not re-trigger to backfill the
new user_id column on deployments already running v3. Bump to 4.
* release: 0.54.13 — RBAC filter uses stable user_id (#293)
---------
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
130 lines
4.8 KiB
Python
130 lines
4.8 KiB
Python
"""v41 → v42 migration: 7 new usage_* tables for telemetry."""
|
|
|
|
import duckdb
|
|
import pytest
|
|
from src.db import _ensure_schema as init_database, SCHEMA_VERSION
|
|
|
|
|
|
def test_schema_version_is_42():
|
|
# v44 bumped by PR #297 (homepage stats frame backing columns) — keep
|
|
# this assertion in lockstep with `_SYSTEM_SCHEMA` SCHEMA_VERSION
|
|
# constant. Test name preserved for git-blame continuity; the
|
|
# version-pinned tests in test_db_schema_version.py and
|
|
# test_home_stats.py carry the v44 commentary.
|
|
assert SCHEMA_VERSION == 45
|
|
|
|
|
|
def test_v42_tables_exist_after_init(tmp_path):
|
|
db_path = tmp_path / "test.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
tables = {
|
|
row[0]
|
|
for row in conn.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='main'").fetchall()
|
|
}
|
|
for tbl in [
|
|
"usage_events",
|
|
"usage_session_summary",
|
|
"usage_tool_daily",
|
|
"usage_plugin_daily",
|
|
"usage_attribution_skills",
|
|
"usage_attribution_agents",
|
|
"usage_attribution_commands",
|
|
]:
|
|
assert tbl in tables, f"missing table {tbl}"
|
|
conn.close()
|
|
|
|
|
|
def test_v42_indices_exist(tmp_path):
|
|
db_path = tmp_path / "test.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
idx_names = {
|
|
row[0]
|
|
for row in conn.execute("SELECT index_name FROM duckdb_indexes WHERE table_name LIKE 'usage_%'").fetchall()
|
|
}
|
|
for idx in [
|
|
"idx_usage_events_session",
|
|
"idx_usage_events_user_time",
|
|
"idx_usage_events_tool",
|
|
"idx_usage_events_skill",
|
|
"idx_usage_events_ref",
|
|
"idx_usage_session_user",
|
|
"idx_usage_session_started",
|
|
"idx_usage_attr_skill_lookup",
|
|
"idx_usage_attr_agent_lookup",
|
|
"idx_usage_attr_command_lookup",
|
|
]:
|
|
assert idx in idx_names, f"missing index {idx}"
|
|
conn.close()
|
|
|
|
|
|
def test_v41_to_v42_is_idempotent(tmp_path):
|
|
"""Running init twice on same DB must not error and version stays 41."""
|
|
db_path = tmp_path / "twice.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
conn.close()
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
v = conn.execute("SELECT MAX(version) FROM schema_version").fetchone()[0]
|
|
assert v == 45
|
|
conn.close()
|
|
|
|
|
|
def test_v41_db_upgrades_cleanly(tmp_path):
|
|
"""A v40-state DB (post-Activity-Center) must climb to v41 without error."""
|
|
db_path = tmp_path / "v41.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
# Minimal v40 baseline shape — schema_version + audit_log with v40 columns.
|
|
conn.execute("CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp)")
|
|
conn.execute("INSERT INTO schema_version (version) VALUES (41)")
|
|
conn.execute("""CREATE TABLE audit_log (
|
|
id VARCHAR PRIMARY KEY, timestamp TIMESTAMP DEFAULT current_timestamp,
|
|
user_id VARCHAR, action VARCHAR, resource VARCHAR, params JSON,
|
|
result VARCHAR, duration_ms INTEGER,
|
|
params_before JSON, client_ip VARCHAR, client_kind VARCHAR, correlation_id VARCHAR
|
|
)""")
|
|
conn.close()
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
v = conn.execute("SELECT MAX(version) FROM schema_version").fetchone()[0]
|
|
assert v == 45
|
|
# All 7 new v41 tables exist after the v40→v41 upgrade
|
|
tables = {
|
|
row[0]
|
|
for row in conn.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='main'").fetchall()
|
|
}
|
|
for tbl in [
|
|
"usage_events",
|
|
"usage_session_summary",
|
|
"usage_tool_daily",
|
|
"usage_plugin_daily",
|
|
"usage_attribution_skills",
|
|
"usage_attribution_agents",
|
|
"usage_attribution_commands",
|
|
]:
|
|
assert tbl in tables, f"missing table {tbl} after v40→v41 upgrade"
|
|
conn.close()
|
|
|
|
|
|
def test_v30_db_ladders_all_the_way_up(tmp_path):
|
|
"""Old v30-state DB must climb all the way to v41 without losing data."""
|
|
db_path = tmp_path / "v30.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
conn.execute("CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp)")
|
|
conn.execute("INSERT INTO schema_version (version) VALUES (30)")
|
|
conn.execute("CREATE TABLE audit_log (id VARCHAR PRIMARY KEY)")
|
|
conn.execute("INSERT INTO audit_log (id) VALUES ('vintage')")
|
|
conn.close()
|
|
|
|
conn = duckdb.connect(str(db_path))
|
|
init_database(conn)
|
|
v = conn.execute("SELECT MAX(version) FROM schema_version").fetchone()[0]
|
|
assert v == 45
|
|
cnt = conn.execute("SELECT COUNT(*) FROM audit_log WHERE id='vintage'").fetchone()[0]
|
|
assert cnt == 1
|
|
# New v41 table exists
|
|
cnt2 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
|
|
assert cnt2 == 0
|
|
conn.close()
|