* fix(security): RBAC filter for agnes_sessions matches both email local-part and user_id
The upload API (POST /api/upload/sessions) stores session files under
user_sessions/{user_id}/ (UUID), while the session collector uses the
OS username (email local-part). The session pipeline writes the directory
name verbatim into usage_session_summary.username, so the column can
contain either value depending on the ingestion path.
The RBAC filter in build_filter_clause previously only matched the email
local-part, missing sessions uploaded via the API. The fix adds an OR
condition so non-admin users see rows where username matches either their
email local-part or their user_id.
Closes #293
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): RBAC filter uses stable user_id instead of mutable email local-part
Closes #293
Previous fix used OR condition matching both email local-part and user_id
in the username column. This was fragile: email changes would break
filtering. This commit introduces a dedicated user_id column populated
by the session pipeline via resolve_user_id(), and switches the RBAC
filter to use it exclusively.
Changes:
- Schema v45: add user_id column to usage_session_summary and usage_events
- UsageProcessor: accept and store user_id in both tables
- runner.py: resolve_user_id() maps directory name to users.id UUID
(exact match for UUID dirs, email LIKE for local-part dirs)
- INTERNAL_TABLES: agnes_sessions/agnes_telemetry filter on user_id column
- build_filter_clause: simplified to WHERE user_id = '<uuid>' (no OR)
- me.py/admin_user_sessions.py: query by user_id OR username for
backward compatibility during transition
- USAGE_PROCESSOR_VERSION bumped 2→3 to trigger reprocessing/backfill
- Tests updated: 27 pass including new email-change resilience test
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(tests): bump schema version assertions 44→45
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(docs): correct resolve_user_id docstring, add TypeError comment
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address review — backward-compat OR, LIKE escaping, narrower TypeError
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address code review — eliminate TypeError hack, add resolve_user_id tests
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(db): create user_id indexes in _v44_to_v45, not _SYSTEM_SCHEMA
_SYSTEM_SCHEMA runs before the migration ladder. On an upgrade from
v42/v43/v44, usage_events / usage_session_summary already exist without
the user_id column (CREATE TABLE IF NOT EXISTS is a no-op), so the
CREATE INDEX ... (user_id) lines in _SYSTEM_SCHEMA failed to bind and
aborted _ensure_schema — the app would not start post-upgrade. Move the
index creation to _v44_to_v45, which ADDs the column first. Same pattern
as the v41 audit_log indices.
* fix(usage): bump USAGE_PROCESSOR_VERSION 3→4 for user_id backfill
#303 shipped USAGE_PROCESSOR_VERSION=3 (release 0.54.12) for its
<command-name> slash extraction. This PR's 2→3 bump collided with it
on rebase, so the reprocess loop would not re-trigger to backfill the
new user_id column on deployments already running v3. Bump to 4.
* release: 0.54.13 — RBAC filter uses stable user_id (#293)
---------
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
349 lines
11 KiB
Python
349 lines
11 KiB
Python
"""Homepage status frame — schema v44, endpoint shapes, manifest stamp,
|
|
operator visibility flag.
|
|
|
|
Covers:
|
|
- v44 ALTERs land users.last_pull_at + 4 token columns idempotently on
|
|
both fresh installs and upgrades from a v43-shaped DB.
|
|
- compute_home_stats returns the right counters for 24h vs 7d windows
|
|
and clamps unknown windows to 24h.
|
|
- GET /api/sync/manifest bumps users.last_pull_at as a side effect.
|
|
- get_home_status_frame_visibility honors the env var + yaml override
|
|
and defaults true.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from datetime import datetime, timezone
|
|
|
|
import duckdb
|
|
import pytest
|
|
|
|
from src.db import (
|
|
SCHEMA_VERSION,
|
|
_SYSTEM_SCHEMA,
|
|
_ensure_schema,
|
|
_v43_to_v44,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schema v44
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_v44_fresh_install_has_token_columns_and_last_pull(tmp_path):
|
|
"""Fresh install reaches v44 with all new columns declared in
|
|
_SYSTEM_SCHEMA (the migration function is a no-op on fresh install
|
|
but the columns must exist regardless)."""
|
|
db_path = tmp_path / "system.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
_ensure_schema(conn)
|
|
|
|
user_cols = {r[1] for r in conn.execute("PRAGMA table_info(users)").fetchall()}
|
|
assert "last_pull_at" in user_cols
|
|
|
|
sess_cols = {r[1] for r in conn.execute("PRAGMA table_info(usage_session_summary)").fetchall()}
|
|
for col in (
|
|
"input_tokens",
|
|
"output_tokens",
|
|
"cache_read_tokens",
|
|
"cache_creation_tokens",
|
|
):
|
|
assert col in sess_cols, f"missing {col}"
|
|
|
|
|
|
def test_v43_to_v44_upgrade_is_idempotent(tmp_path):
|
|
"""Running _v43_to_v44 on a hand-rolled pre-v44 DB lands the four
|
|
new columns; a second call is a no-op (IF NOT EXISTS guards)."""
|
|
db_path = tmp_path / "system.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
# Hand-roll v43-shaped tables (no last_pull_at, no token cols).
|
|
conn.execute("CREATE TABLE users (id VARCHAR PRIMARY KEY, email VARCHAR, onboarded BOOLEAN DEFAULT FALSE)")
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE usage_session_summary (
|
|
session_file VARCHAR PRIMARY KEY,
|
|
session_id VARCHAR NOT NULL,
|
|
username VARCHAR NOT NULL,
|
|
started_at TIMESTAMP,
|
|
ended_at TIMESTAMP,
|
|
user_messages INTEGER DEFAULT 0,
|
|
processor_version INTEGER NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
|
|
_v43_to_v44(conn)
|
|
_v43_to_v44(conn) # idempotent
|
|
|
|
assert "last_pull_at" in {r[1] for r in conn.execute("PRAGMA table_info(users)").fetchall()}
|
|
tok_cols = {r[1] for r in conn.execute("PRAGMA table_info(usage_session_summary)").fetchall() if "token" in r[1]}
|
|
assert tok_cols == {
|
|
"input_tokens",
|
|
"output_tokens",
|
|
"cache_read_tokens",
|
|
"cache_creation_tokens",
|
|
}
|
|
|
|
|
|
def test_schema_version_constant_is_44():
|
|
"""Belt + suspenders against schema_version regressions."""
|
|
assert SCHEMA_VERSION == 45
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# compute_home_stats / GET /api/me/home-stats
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def stats_conn(tmp_path):
|
|
db_path = tmp_path / "system.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
conn.execute(_SYSTEM_SCHEMA)
|
|
return conn
|
|
|
|
|
|
def _seed_user(conn, *, uid="u1", email="alice@example.com"):
|
|
conn.execute(
|
|
"INSERT INTO users (id, email, active, onboarded, last_pull_at) VALUES (?, ?, TRUE, TRUE, current_timestamp)",
|
|
[uid, email],
|
|
)
|
|
|
|
|
|
def _seed_session(
|
|
conn,
|
|
*,
|
|
session_file,
|
|
username,
|
|
started_sql,
|
|
prompts=0,
|
|
input_tokens=0,
|
|
output_tokens=0,
|
|
cache_read=0,
|
|
cache_creation=0,
|
|
):
|
|
conn.execute(
|
|
f"""
|
|
INSERT INTO usage_session_summary
|
|
(session_file, session_id, username, started_at, ended_at,
|
|
user_messages, input_tokens, output_tokens, cache_read_tokens,
|
|
cache_creation_tokens, processor_version)
|
|
VALUES (?, ?, ?, {started_sql}, current_timestamp,
|
|
?, ?, ?, ?, ?, 2)
|
|
""",
|
|
[session_file, session_file, username, prompts, input_tokens, output_tokens, cache_read, cache_creation],
|
|
)
|
|
|
|
|
|
def _seed_event(conn, *, ev_id, session_file, username, cwd, occurred_sql):
|
|
conn.execute(
|
|
f"""
|
|
INSERT INTO usage_events
|
|
(id, session_id, session_file, username, event_type, source,
|
|
cwd, occurred_at, processor_version)
|
|
VALUES (?, ?, ?, ?, 'tool_use', 'builtin', ?, {occurred_sql}, 2)
|
|
""",
|
|
[ev_id, session_file, session_file, username, cwd],
|
|
)
|
|
|
|
|
|
def test_compute_home_stats_24h_vs_7d_windowing(stats_conn):
|
|
"""A session 1h ago shows in both windows; a session 3 days ago
|
|
only in 7d; a session 30 days ago in neither."""
|
|
from app.api.me import compute_home_stats
|
|
|
|
_seed_user(stats_conn)
|
|
_seed_session(
|
|
stats_conn,
|
|
session_file="a.jsonl",
|
|
username="alice",
|
|
started_sql="current_timestamp - INTERVAL 1 HOUR",
|
|
prompts=5,
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
cache_read=800,
|
|
cache_creation=25,
|
|
)
|
|
_seed_session(
|
|
stats_conn,
|
|
session_file="b.jsonl",
|
|
username="alice",
|
|
started_sql="current_timestamp - INTERVAL 3 DAY",
|
|
prompts=5,
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
cache_read=800,
|
|
cache_creation=25,
|
|
)
|
|
_seed_session(
|
|
stats_conn,
|
|
session_file="c.jsonl",
|
|
username="alice",
|
|
started_sql="current_timestamp - INTERVAL 30 DAY",
|
|
prompts=99,
|
|
)
|
|
|
|
_seed_event(
|
|
stats_conn,
|
|
ev_id="e1",
|
|
session_file="a.jsonl",
|
|
username="alice",
|
|
cwd="/proj/alpha",
|
|
occurred_sql="current_timestamp - INTERVAL 1 HOUR",
|
|
)
|
|
_seed_event(
|
|
stats_conn,
|
|
ev_id="e2",
|
|
session_file="a.jsonl",
|
|
username="alice",
|
|
cwd="/proj/beta",
|
|
occurred_sql="current_timestamp - INTERVAL 2 HOUR",
|
|
)
|
|
_seed_event(
|
|
stats_conn,
|
|
ev_id="e3",
|
|
session_file="b.jsonl",
|
|
username="alice",
|
|
cwd="/proj/gamma",
|
|
occurred_sql="current_timestamp - INTERVAL 3 DAY",
|
|
)
|
|
|
|
user = {"id": "u1", "email": "alice@example.com"}
|
|
|
|
s24 = compute_home_stats(stats_conn, user, "24h")
|
|
assert s24["window"] == "24h"
|
|
assert s24["sessions"] == 1
|
|
assert s24["prompts"] == 5
|
|
assert s24["projects"] == 2
|
|
assert s24["tokens"]["total"] == 100 + 50 + 800 + 25
|
|
assert s24["last_pull_at"] is not None
|
|
|
|
s7 = compute_home_stats(stats_conn, user, "7d")
|
|
assert s7["window"] == "7d"
|
|
assert s7["sessions"] == 2
|
|
assert s7["prompts"] == 10
|
|
assert s7["projects"] == 3
|
|
assert s7["tokens"]["total"] == 2 * (100 + 50 + 800 + 25)
|
|
|
|
|
|
def test_compute_home_stats_unknown_window_clamps_to_24h(stats_conn):
|
|
"""Out-of-band window values clamp to 24h rather than 400-ing."""
|
|
from app.api.me import compute_home_stats
|
|
|
|
_seed_user(stats_conn)
|
|
s = compute_home_stats(stats_conn, {"id": "u1", "email": "alice@example.com"}, "bogus")
|
|
assert s["window"] == "24h"
|
|
|
|
|
|
def test_compute_home_stats_empty_user_returns_zeros(stats_conn):
|
|
"""Brand-new user with no sessions / events surfaces zeros, not 500."""
|
|
from app.api.me import compute_home_stats
|
|
|
|
_seed_user(stats_conn, uid="u_empty", email="nobody@example.com")
|
|
s = compute_home_stats(
|
|
stats_conn,
|
|
{"id": "u_empty", "email": "nobody@example.com"},
|
|
"24h",
|
|
)
|
|
assert s["sessions"] == 0
|
|
assert s["prompts"] == 0
|
|
assert s["projects"] == 0
|
|
assert s["tokens"]["total"] == 0
|
|
# Seeded user row carries a last_pull_at from the helper, so this
|
|
# asserts the column travels through the join correctly.
|
|
assert s["last_pull_at"] is not None
|
|
|
|
|
|
def test_compute_home_stats_missing_users_row_returns_zeros(stats_conn):
|
|
"""If the users row is missing entirely (race during deletion), the
|
|
helper returns a zeroed payload instead of crashing."""
|
|
from app.api.me import compute_home_stats
|
|
|
|
s = compute_home_stats(
|
|
stats_conn,
|
|
{"id": "ghost", "email": "ghost@example.com"},
|
|
"24h",
|
|
)
|
|
assert s == {
|
|
"window": "24h",
|
|
"last_pull_at": None,
|
|
"sessions": 0,
|
|
"prompts": 0,
|
|
"tokens": {
|
|
"input": 0,
|
|
"output": 0,
|
|
"cache_read": 0,
|
|
"cache_creation": 0,
|
|
"total": 0,
|
|
},
|
|
"projects": 0,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GET /api/sync/manifest bumps users.last_pull_at
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_sync_manifest_bumps_last_pull_at(stats_conn, monkeypatch, tmp_path):
|
|
"""The manifest endpoint records the user's pull timestamp so the
|
|
/home status frame's 'Last sync' card stays current."""
|
|
from app.api.sync import sync_manifest
|
|
|
|
# data_dir for asset hashing; we don't seed docs/profiles so the
|
|
# assets dict will be empty (manifest still returns ok).
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
|
|
_seed_user(stats_conn, uid="u_pull", email="puller@example.com")
|
|
# Wipe seeded last_pull_at so we can detect the bump.
|
|
stats_conn.execute("UPDATE users SET last_pull_at = NULL WHERE id = ?", ["u_pull"])
|
|
|
|
asyncio.run(
|
|
sync_manifest(
|
|
user={"id": "u_pull", "email": "puller@example.com"},
|
|
conn=stats_conn,
|
|
)
|
|
)
|
|
row = stats_conn.execute("SELECT last_pull_at FROM users WHERE id = ?", ["u_pull"]).fetchone()
|
|
# Don't compare against `datetime.now(utc)` — DuckDB's
|
|
# ``current_timestamp`` returns the session's wall-clock time which
|
|
# may be naive-local-or-utc depending on the environment, so a
|
|
# delta-based assertion would tz-skew. The semantic the test cares
|
|
# about is "the column flipped from NULL", which is what the home
|
|
# status card reads.
|
|
assert row[0] is not None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Operator visibility flag — get_home_status_frame_visibility
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_status_frame_default_is_visible(monkeypatch):
|
|
"""Absent both env var and yaml entry, the flag returns True."""
|
|
monkeypatch.delenv("AGNES_HOME_SHOW_STATUS_FRAME", raising=False)
|
|
from app.instance_config import get_home_status_frame_visibility
|
|
|
|
assert get_home_status_frame_visibility() is True
|
|
|
|
|
|
def test_status_frame_env_var_off(monkeypatch):
|
|
"""AGNES_HOME_SHOW_STATUS_FRAME=0 hides the frame."""
|
|
monkeypatch.setenv("AGNES_HOME_SHOW_STATUS_FRAME", "0")
|
|
from app.instance_config import get_home_status_frame_visibility
|
|
|
|
assert get_home_status_frame_visibility() is False
|
|
|
|
|
|
def test_status_frame_env_var_falsey_values(monkeypatch):
|
|
"""Each of {0, false, no, off, ''} hides the frame; anything else shows."""
|
|
from app.instance_config import get_home_status_frame_visibility
|
|
|
|
for val in ("0", "false", "False", "FALSE", "no", "off", ""):
|
|
monkeypatch.setenv("AGNES_HOME_SHOW_STATUS_FRAME", val)
|
|
assert get_home_status_frame_visibility() is False, f"{val!r} should hide"
|
|
for val in ("1", "true", "yes", "on", "anything"):
|
|
monkeypatch.setenv("AGNES_HOME_SHOW_STATUS_FRAME", val)
|
|
assert get_home_status_frame_visibility() is True, f"{val!r} should show"
|