UsageProcessor missed every user-typed slash invocation because the SLASH_RE regex (^\s*/<name>) expected a raw "/foo" prefix that Claude Code never writes. Real session jsonls wrap slash commands in a <command-name>/foo</command-name> XML tag inside user message content. Result on production: usage_events.command_name and usage_session_summary.slash_commands stayed NULL/0 for /clear, /exit, plugin commands like /plugin:name — verified on 17 dev-VM jsonls holding 25 <command-name> tags / 0 extracted rows. Replaces SLASH_RE with COMMAND_NAME_RE that searches for the tag anywhere in the user text (the tag sits after a <command-message> sibling). USAGE_PROCESSOR_VERSION bumps 2 → 3; operators wanting to rewrite historical rows under the new logic call POST /api/admin/usage/reprocess (agnes admin telemetry reprocess). Fixtures slash_command.jsonl, mixed.jsonl, skill_curated.jsonl rewritten from the unrealistic "/foo args" string format to the real <command-name> tag wrapper — existing assertions stay green against the new format, which is the regression baseline going forward. Adds TestCommandNameTagExtraction (4 unit tests on iter_events) covering string content, list-of-text-blocks content, mid-text tag position, and plain-prose "/x" non-match. Implicit Skill tool_use extraction (LLM-decided invocations) unchanged. Co-authored-by: Minas Arustamyan <arustamyan.minas@gmail.com>
436 lines
16 KiB
Python
436 lines
16 KiB
Python
"""Tests for UsageProcessor — fixture-driven, covers extraction, attribution, errors,
|
|
idempotency, and empty-session handling."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import duckdb
|
|
import pytest
|
|
|
|
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "sessions" / "usage"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _fresh_db(tmp_path, monkeypatch) -> duckdb.DuckDBPyConnection:
|
|
"""Fresh fully-migrated DuckDB in tmp_path (same idiom as test_session_pipeline.py)."""
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
import src.db as db_module
|
|
db_module._system_db_conn = None
|
|
db_module._system_db_path = None
|
|
return db_module.get_system_db()
|
|
|
|
|
|
def _seed_attribution(conn: duckdb.DuckDBPyConnection) -> None:
|
|
"""Seed attribution rows the fixtures reference."""
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)"
|
|
" VALUES ('curated', 'mp/plug', 'my-skill')"
|
|
)
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)"
|
|
" VALUES ('flea', 'entity-1', 'flea-skill')"
|
|
)
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO usage_attribution_agents (source, ref_id, agent_name)"
|
|
" VALUES ('curated', 'mp/plug', 'my-agent')"
|
|
)
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO usage_attribution_commands (source, ref_id, command_name)"
|
|
" VALUES ('curated', 'mp/plug', 'compound:debug')"
|
|
)
|
|
|
|
|
|
def _process(fixture_name: str, conn: duckdb.DuckDBPyConnection) -> None:
|
|
"""Run UsageProcessor against a fixture file."""
|
|
from services.session_processors.usage import UsageProcessor
|
|
processor = UsageProcessor()
|
|
path = FIXTURES_DIR / fixture_name
|
|
result = processor.process_session(
|
|
session_path=path,
|
|
username="test-user",
|
|
session_key=fixture_name,
|
|
conn=conn,
|
|
)
|
|
return result
|
|
|
|
|
|
def _events(conn: duckdb.DuckDBPyConnection) -> list[dict]:
|
|
rows = conn.execute(
|
|
"SELECT * FROM usage_events ORDER BY occurred_at ASC"
|
|
).fetchall()
|
|
desc = [d[0] for d in conn.description]
|
|
return [dict(zip(desc, row)) for row in rows]
|
|
|
|
|
|
def _summary(conn: duckdb.DuckDBPyConnection, session_key: str) -> dict | None:
|
|
row = conn.execute(
|
|
"SELECT * FROM usage_session_summary WHERE session_file = ?",
|
|
[session_key],
|
|
).fetchone()
|
|
if row is None:
|
|
return None
|
|
desc = [d[0] for d in conn.description]
|
|
return dict(zip(desc, row))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSimpleBash:
|
|
def test_extracts_one_event(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("simple_bash.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert len(evts) == 1
|
|
assert evts[0]["tool_name"] == "Bash"
|
|
assert evts[0]["event_type"] == "tool_use"
|
|
|
|
def test_builtin_source(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("simple_bash.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert evts[0]["source"] == "builtin"
|
|
assert evts[0]["ref_id"] is None
|
|
|
|
def test_no_error_flag(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("simple_bash.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert evts[0]["is_error"] is False
|
|
|
|
def test_summary_written(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("simple_bash.jsonl", conn)
|
|
s = _summary(conn, "simple_bash.jsonl")
|
|
assert s is not None
|
|
assert s["tool_calls"] == 1
|
|
assert s["tool_errors"] == 0
|
|
assert s["username"] == "test-user"
|
|
|
|
|
|
class TestMcpCall:
|
|
def test_mcp_event_type(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mcp_call.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert len(evts) == 1
|
|
assert evts[0]["event_type"] == "mcp_call"
|
|
assert evts[0]["tool_name"] == "mcp__github__create_issue"
|
|
|
|
def test_mcp_builtin_source(self, tmp_path, monkeypatch):
|
|
"""MCP tools not in attribution tables fall back to builtin."""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mcp_call.jsonl", conn)
|
|
evts = _events(conn)
|
|
# mcp__github__create_issue is not in the attribution tables → builtin fallback
|
|
assert evts[0]["source"] == "builtin"
|
|
|
|
def test_summary_mcp_count(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mcp_call.jsonl", conn)
|
|
s = _summary(conn, "mcp_call.jsonl")
|
|
assert s["mcp_calls"] == 1
|
|
|
|
|
|
class TestCuratedSkill:
|
|
def test_curated_attribution(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("skill_curated.jsonl", conn)
|
|
row = conn.execute(
|
|
"SELECT source, ref_id FROM usage_events WHERE skill_name = 'my-skill'"
|
|
).fetchone()
|
|
assert row is not None
|
|
assert row[0] == "curated"
|
|
assert row[1] == "mp/plug"
|
|
|
|
def test_skill_invocations_count(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("skill_curated.jsonl", conn)
|
|
s = _summary(conn, "skill_curated.jsonl")
|
|
assert s["skill_invocations"] == 1
|
|
|
|
|
|
class TestFleaSkill:
|
|
def test_flea_attribution(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("skill_flea.jsonl", conn)
|
|
row = conn.execute(
|
|
"SELECT source, ref_id FROM usage_events WHERE skill_name = 'flea-skill'"
|
|
).fetchone()
|
|
assert row is not None
|
|
assert row[0] == "flea"
|
|
assert row[1] == "entity-1"
|
|
|
|
|
|
class TestSlashCommand:
|
|
def test_slash_command_extracted(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("slash_command.jsonl", conn)
|
|
evts = _events(conn)
|
|
slash_evts = [e for e in evts if e["event_type"] == "slash_command"]
|
|
assert len(slash_evts) == 1
|
|
assert slash_evts[0]["command_name"] == "compound:debug"
|
|
|
|
def test_slash_command_attribution(self, tmp_path, monkeypatch):
|
|
"""compound:debug is in attribution_commands → should resolve to curated."""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("slash_command.jsonl", conn)
|
|
row = conn.execute(
|
|
"SELECT source, ref_id FROM usage_events WHERE command_name = 'compound:debug'"
|
|
).fetchone()
|
|
assert row is not None
|
|
assert row[0] == "curated"
|
|
assert row[1] == "mp/plug"
|
|
|
|
def test_slash_commands_in_summary(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("slash_command.jsonl", conn)
|
|
s = _summary(conn, "slash_command.jsonl")
|
|
assert s["slash_commands"] == 1
|
|
|
|
|
|
class TestSubagent:
|
|
def test_subagent_event_type(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("subagent.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert len(evts) == 1
|
|
assert evts[0]["event_type"] == "subagent"
|
|
assert evts[0]["subagent_type"] == "my-agent"
|
|
|
|
def test_subagent_attributed(self, tmp_path, monkeypatch):
|
|
"""my-agent is in attribution_agents → curated."""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("subagent.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert evts[0]["source"] == "curated"
|
|
assert evts[0]["ref_id"] == "mp/plug"
|
|
|
|
def test_subagent_dispatches_in_summary(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("subagent.jsonl", conn)
|
|
s = _summary(conn, "subagent.jsonl")
|
|
assert s["subagent_dispatches"] == 1
|
|
|
|
|
|
class TestToolError:
|
|
def test_error_flagged_on_event(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("tool_error.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert len(evts) == 1
|
|
assert evts[0]["tool_name"] == "Bash"
|
|
assert evts[0]["is_error"] is True
|
|
|
|
def test_tool_errors_in_summary(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("tool_error.jsonl", conn)
|
|
s = _summary(conn, "tool_error.jsonl")
|
|
assert s["tool_errors"] == 1
|
|
assert s["tool_calls"] == 1
|
|
|
|
|
|
class TestMixedSession:
|
|
def test_mixed_event_counts(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mixed.jsonl", conn)
|
|
evts = _events(conn)
|
|
types = [e["event_type"] for e in evts]
|
|
# one slash_command + one tool_use (Bash) + one tool_use (Skill) +
|
|
# one mcp_call + one subagent + one tool_use (Bash with error) = 6 events
|
|
assert "slash_command" in types
|
|
assert "tool_use" in types
|
|
assert "mcp_call" in types
|
|
assert "subagent" in types
|
|
|
|
def test_mixed_summary_counts(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mixed.jsonl", conn)
|
|
s = _summary(conn, "mixed.jsonl")
|
|
assert s is not None
|
|
assert s["mcp_calls"] == 1
|
|
assert s["subagent_dispatches"] == 1
|
|
assert s["skill_invocations"] == 1
|
|
assert s["slash_commands"] == 1
|
|
assert s["tool_errors"] == 1
|
|
|
|
def test_mixed_error_correlated(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mixed.jsonl", conn)
|
|
err_evts = conn.execute(
|
|
"SELECT tool_name FROM usage_events WHERE is_error = TRUE"
|
|
).fetchall()
|
|
assert len(err_evts) == 1
|
|
assert err_evts[0][0] == "Bash"
|
|
|
|
|
|
class TestEmptySession:
|
|
def test_zero_events_writes_summary(self, tmp_path, monkeypatch):
|
|
"""Empty session (only system/summary turns) yields 0 events but a summary row."""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
result = _process("empty.jsonl", conn)
|
|
evts = _events(conn)
|
|
assert len(evts) == 0
|
|
s = _summary(conn, "empty.jsonl")
|
|
assert s is not None
|
|
assert s["tool_calls"] == 0
|
|
|
|
def test_processor_result_zero_items(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
result = _process("empty.jsonl", conn)
|
|
assert result.items_count == 0
|
|
|
|
|
|
class TestIdempotency:
|
|
def test_reprocess_same_event_count(self, tmp_path, monkeypatch):
|
|
"""INSERT OR IGNORE: processing the same session twice yields same event count."""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("simple_bash.jsonl", conn)
|
|
count_1 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
|
|
_process("simple_bash.jsonl", conn)
|
|
count_2 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
|
|
assert count_1 == count_2 == 1
|
|
|
|
def test_reprocess_mixed_idempotent(self, tmp_path, monkeypatch):
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
_process("mixed.jsonl", conn)
|
|
n1 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
|
|
_process("mixed.jsonl", conn)
|
|
n2 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
|
|
assert n1 == n2
|
|
|
|
|
|
class TestMultiToolTurnDedup:
|
|
def test_two_tool_calls_in_same_turn_produce_two_events(self, tmp_path, monkeypatch):
|
|
"""Parallel Bash + Read in the same assistant turn must produce 2 distinct events.
|
|
|
|
Regression — earlier bug: same event_uuid + same tool_name collided in id hash,
|
|
so the second tool_use was silently dropped by INSERT OR IGNORE.
|
|
"""
|
|
conn = _fresh_db(tmp_path, monkeypatch)
|
|
_seed_attribution(conn)
|
|
|
|
jsonl_path = tmp_path / "multi_tool_turn.jsonl"
|
|
jsonl_path.write_text(
|
|
json.dumps({
|
|
"uuid": "turn-1",
|
|
"parentUuid": None,
|
|
"type": "assistant",
|
|
"sessionId": "sess-multi",
|
|
"timestamp": "2026-05-12T10:00:00Z",
|
|
"message": {
|
|
"role": "assistant",
|
|
"model": "claude-x",
|
|
"content": [
|
|
{"type": "tool_use", "id": "tu_a", "name": "Bash", "input": {"command": "ls"}},
|
|
{"type": "tool_use", "id": "tu_b", "name": "Bash", "input": {"command": "pwd"}},
|
|
],
|
|
},
|
|
}) + "\n"
|
|
)
|
|
|
|
from services.session_processors.usage import UsageProcessor
|
|
processor = UsageProcessor()
|
|
processor.process_session(
|
|
session_path=jsonl_path,
|
|
username="alice",
|
|
session_key="alice/multi_tool_turn.jsonl",
|
|
conn=conn,
|
|
)
|
|
|
|
n = conn.execute(
|
|
"SELECT COUNT(*) FROM usage_events WHERE session_id='sess-multi'"
|
|
).fetchone()[0]
|
|
assert n == 2, f"expected 2 events (one per tu_xxx), got {n}"
|
|
|
|
|
|
class TestCommandNameTagExtraction:
|
|
"""Slash invocations arrive as <command-name>/foo</command-name> embedded in
|
|
user message content (Claude Code's wire format). Unit-test iter_events
|
|
against synthetic turns so a future shape shift doesn't silently regress."""
|
|
|
|
@staticmethod
|
|
def _user_turn(content):
|
|
return {
|
|
"type": "user",
|
|
"uuid": "u1",
|
|
"parentUuid": None,
|
|
"sessionId": "sess-cn",
|
|
"timestamp": "2026-05-14T10:00:00.000Z",
|
|
"cwd": "/workspace",
|
|
"message": {"role": "user", "content": content},
|
|
}
|
|
|
|
def test_extracts_command_name_from_string_content(self):
|
|
from services.session_processors.usage_lib import iter_events
|
|
turn = self._user_turn(
|
|
"<command-name>/clear</command-name>\n<command-args></command-args>"
|
|
)
|
|
events = list(iter_events([turn]))
|
|
assert len(events) == 1
|
|
assert events[0].event_type == "slash_command"
|
|
assert events[0].command_name == "clear"
|
|
|
|
def test_extracts_command_name_from_text_block(self):
|
|
"""Defensive: same regex behavior when content arrives as a list-of-blocks
|
|
instead of a plain string, in case Claude Code's wire format shifts."""
|
|
from services.session_processors.usage_lib import iter_events
|
|
turn = self._user_turn(
|
|
[{"type": "text", "text": "<command-name>/plugin:name</command-name>"}]
|
|
)
|
|
events = list(iter_events([turn]))
|
|
assert len(events) == 1
|
|
assert events[0].command_name == "plugin:name"
|
|
|
|
def test_command_name_not_at_start_still_matches(self):
|
|
"""Real Claude Code prepends a <command-message> sibling before the
|
|
<command-name> tag — regex must search, not anchor at start."""
|
|
from services.session_processors.usage_lib import iter_events
|
|
turn = self._user_turn(
|
|
"<command-message>foo</command-message>\n"
|
|
"<command-name>/foo</command-name>\n"
|
|
"<command-args>some arg</command-args>"
|
|
)
|
|
events = list(iter_events([turn]))
|
|
assert len(events) == 1
|
|
assert events[0].command_name == "foo"
|
|
|
|
def test_plain_text_without_tag_does_not_match(self):
|
|
"""A user message that happens to contain '/foo' as prose, but no
|
|
<command-name> tag, must NOT yield a slash_command event — that's the
|
|
whole point of switching from the old `^\\s*/<name>` regex."""
|
|
from services.session_processors.usage_lib import iter_events
|
|
turn = self._user_turn("Hello world, see /not-a-command-just-prose for context.")
|
|
events = list(iter_events([turn]))
|
|
assert events == []
|