agnes-the-ai-analyst/tests/test_session_processor_usage.py
minasarustamyan f53e98d5a3
fix(usage): extract <command-name> slash invocations (release 0.54.12) (#303)
UsageProcessor missed every user-typed slash invocation because the
SLASH_RE regex (^\s*/<name>) expected a raw "/foo" prefix that Claude
Code never writes. Real session jsonls wrap slash commands in a
<command-name>/foo</command-name> XML tag inside user message content.
Result on production: usage_events.command_name and
usage_session_summary.slash_commands stayed NULL/0 for /clear, /exit,
plugin commands like /plugin:name — verified on 17 dev-VM jsonls
holding 25 <command-name> tags / 0 extracted rows.

Replaces SLASH_RE with COMMAND_NAME_RE that searches for the tag
anywhere in the user text (the tag sits after a <command-message>
sibling). USAGE_PROCESSOR_VERSION bumps 2 → 3; operators wanting to
rewrite historical rows under the new logic call
POST /api/admin/usage/reprocess (agnes admin telemetry reprocess).

Fixtures slash_command.jsonl, mixed.jsonl, skill_curated.jsonl
rewritten from the unrealistic "/foo args" string format to the real
<command-name> tag wrapper — existing assertions stay green against
the new format, which is the regression baseline going forward. Adds
TestCommandNameTagExtraction (4 unit tests on iter_events) covering
string content, list-of-text-blocks content, mid-text tag position,
and plain-prose "/x" non-match.

Implicit Skill tool_use extraction (LLM-decided invocations)
unchanged.

Co-authored-by: Minas Arustamyan <arustamyan.minas@gmail.com>
2026-05-14 13:33:57 +00:00

436 lines
16 KiB
Python

"""Tests for UsageProcessor — fixture-driven, covers extraction, attribution, errors,
idempotency, and empty-session handling."""
from __future__ import annotations
import json
from pathlib import Path
import duckdb
import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "sessions" / "usage"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _fresh_db(tmp_path, monkeypatch) -> duckdb.DuckDBPyConnection:
"""Fresh fully-migrated DuckDB in tmp_path (same idiom as test_session_pipeline.py)."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
import src.db as db_module
db_module._system_db_conn = None
db_module._system_db_path = None
return db_module.get_system_db()
def _seed_attribution(conn: duckdb.DuckDBPyConnection) -> None:
"""Seed attribution rows the fixtures reference."""
conn.execute(
"INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)"
" VALUES ('curated', 'mp/plug', 'my-skill')"
)
conn.execute(
"INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)"
" VALUES ('flea', 'entity-1', 'flea-skill')"
)
conn.execute(
"INSERT OR IGNORE INTO usage_attribution_agents (source, ref_id, agent_name)"
" VALUES ('curated', 'mp/plug', 'my-agent')"
)
conn.execute(
"INSERT OR IGNORE INTO usage_attribution_commands (source, ref_id, command_name)"
" VALUES ('curated', 'mp/plug', 'compound:debug')"
)
def _process(fixture_name: str, conn: duckdb.DuckDBPyConnection) -> None:
"""Run UsageProcessor against a fixture file."""
from services.session_processors.usage import UsageProcessor
processor = UsageProcessor()
path = FIXTURES_DIR / fixture_name
result = processor.process_session(
session_path=path,
username="test-user",
session_key=fixture_name,
conn=conn,
)
return result
def _events(conn: duckdb.DuckDBPyConnection) -> list[dict]:
rows = conn.execute(
"SELECT * FROM usage_events ORDER BY occurred_at ASC"
).fetchall()
desc = [d[0] for d in conn.description]
return [dict(zip(desc, row)) for row in rows]
def _summary(conn: duckdb.DuckDBPyConnection, session_key: str) -> dict | None:
row = conn.execute(
"SELECT * FROM usage_session_summary WHERE session_file = ?",
[session_key],
).fetchone()
if row is None:
return None
desc = [d[0] for d in conn.description]
return dict(zip(desc, row))
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestSimpleBash:
def test_extracts_one_event(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("simple_bash.jsonl", conn)
evts = _events(conn)
assert len(evts) == 1
assert evts[0]["tool_name"] == "Bash"
assert evts[0]["event_type"] == "tool_use"
def test_builtin_source(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("simple_bash.jsonl", conn)
evts = _events(conn)
assert evts[0]["source"] == "builtin"
assert evts[0]["ref_id"] is None
def test_no_error_flag(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("simple_bash.jsonl", conn)
evts = _events(conn)
assert evts[0]["is_error"] is False
def test_summary_written(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("simple_bash.jsonl", conn)
s = _summary(conn, "simple_bash.jsonl")
assert s is not None
assert s["tool_calls"] == 1
assert s["tool_errors"] == 0
assert s["username"] == "test-user"
class TestMcpCall:
def test_mcp_event_type(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mcp_call.jsonl", conn)
evts = _events(conn)
assert len(evts) == 1
assert evts[0]["event_type"] == "mcp_call"
assert evts[0]["tool_name"] == "mcp__github__create_issue"
def test_mcp_builtin_source(self, tmp_path, monkeypatch):
"""MCP tools not in attribution tables fall back to builtin."""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mcp_call.jsonl", conn)
evts = _events(conn)
# mcp__github__create_issue is not in the attribution tables → builtin fallback
assert evts[0]["source"] == "builtin"
def test_summary_mcp_count(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mcp_call.jsonl", conn)
s = _summary(conn, "mcp_call.jsonl")
assert s["mcp_calls"] == 1
class TestCuratedSkill:
def test_curated_attribution(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("skill_curated.jsonl", conn)
row = conn.execute(
"SELECT source, ref_id FROM usage_events WHERE skill_name = 'my-skill'"
).fetchone()
assert row is not None
assert row[0] == "curated"
assert row[1] == "mp/plug"
def test_skill_invocations_count(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("skill_curated.jsonl", conn)
s = _summary(conn, "skill_curated.jsonl")
assert s["skill_invocations"] == 1
class TestFleaSkill:
def test_flea_attribution(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("skill_flea.jsonl", conn)
row = conn.execute(
"SELECT source, ref_id FROM usage_events WHERE skill_name = 'flea-skill'"
).fetchone()
assert row is not None
assert row[0] == "flea"
assert row[1] == "entity-1"
class TestSlashCommand:
def test_slash_command_extracted(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("slash_command.jsonl", conn)
evts = _events(conn)
slash_evts = [e for e in evts if e["event_type"] == "slash_command"]
assert len(slash_evts) == 1
assert slash_evts[0]["command_name"] == "compound:debug"
def test_slash_command_attribution(self, tmp_path, monkeypatch):
"""compound:debug is in attribution_commands → should resolve to curated."""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("slash_command.jsonl", conn)
row = conn.execute(
"SELECT source, ref_id FROM usage_events WHERE command_name = 'compound:debug'"
).fetchone()
assert row is not None
assert row[0] == "curated"
assert row[1] == "mp/plug"
def test_slash_commands_in_summary(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("slash_command.jsonl", conn)
s = _summary(conn, "slash_command.jsonl")
assert s["slash_commands"] == 1
class TestSubagent:
def test_subagent_event_type(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("subagent.jsonl", conn)
evts = _events(conn)
assert len(evts) == 1
assert evts[0]["event_type"] == "subagent"
assert evts[0]["subagent_type"] == "my-agent"
def test_subagent_attributed(self, tmp_path, monkeypatch):
"""my-agent is in attribution_agents → curated."""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("subagent.jsonl", conn)
evts = _events(conn)
assert evts[0]["source"] == "curated"
assert evts[0]["ref_id"] == "mp/plug"
def test_subagent_dispatches_in_summary(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("subagent.jsonl", conn)
s = _summary(conn, "subagent.jsonl")
assert s["subagent_dispatches"] == 1
class TestToolError:
def test_error_flagged_on_event(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("tool_error.jsonl", conn)
evts = _events(conn)
assert len(evts) == 1
assert evts[0]["tool_name"] == "Bash"
assert evts[0]["is_error"] is True
def test_tool_errors_in_summary(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("tool_error.jsonl", conn)
s = _summary(conn, "tool_error.jsonl")
assert s["tool_errors"] == 1
assert s["tool_calls"] == 1
class TestMixedSession:
def test_mixed_event_counts(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mixed.jsonl", conn)
evts = _events(conn)
types = [e["event_type"] for e in evts]
# one slash_command + one tool_use (Bash) + one tool_use (Skill) +
# one mcp_call + one subagent + one tool_use (Bash with error) = 6 events
assert "slash_command" in types
assert "tool_use" in types
assert "mcp_call" in types
assert "subagent" in types
def test_mixed_summary_counts(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mixed.jsonl", conn)
s = _summary(conn, "mixed.jsonl")
assert s is not None
assert s["mcp_calls"] == 1
assert s["subagent_dispatches"] == 1
assert s["skill_invocations"] == 1
assert s["slash_commands"] == 1
assert s["tool_errors"] == 1
def test_mixed_error_correlated(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mixed.jsonl", conn)
err_evts = conn.execute(
"SELECT tool_name FROM usage_events WHERE is_error = TRUE"
).fetchall()
assert len(err_evts) == 1
assert err_evts[0][0] == "Bash"
class TestEmptySession:
def test_zero_events_writes_summary(self, tmp_path, monkeypatch):
"""Empty session (only system/summary turns) yields 0 events but a summary row."""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
result = _process("empty.jsonl", conn)
evts = _events(conn)
assert len(evts) == 0
s = _summary(conn, "empty.jsonl")
assert s is not None
assert s["tool_calls"] == 0
def test_processor_result_zero_items(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
result = _process("empty.jsonl", conn)
assert result.items_count == 0
class TestIdempotency:
def test_reprocess_same_event_count(self, tmp_path, monkeypatch):
"""INSERT OR IGNORE: processing the same session twice yields same event count."""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("simple_bash.jsonl", conn)
count_1 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
_process("simple_bash.jsonl", conn)
count_2 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
assert count_1 == count_2 == 1
def test_reprocess_mixed_idempotent(self, tmp_path, monkeypatch):
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
_process("mixed.jsonl", conn)
n1 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
_process("mixed.jsonl", conn)
n2 = conn.execute("SELECT COUNT(*) FROM usage_events").fetchone()[0]
assert n1 == n2
class TestMultiToolTurnDedup:
def test_two_tool_calls_in_same_turn_produce_two_events(self, tmp_path, monkeypatch):
"""Parallel Bash + Read in the same assistant turn must produce 2 distinct events.
Regression — earlier bug: same event_uuid + same tool_name collided in id hash,
so the second tool_use was silently dropped by INSERT OR IGNORE.
"""
conn = _fresh_db(tmp_path, monkeypatch)
_seed_attribution(conn)
jsonl_path = tmp_path / "multi_tool_turn.jsonl"
jsonl_path.write_text(
json.dumps({
"uuid": "turn-1",
"parentUuid": None,
"type": "assistant",
"sessionId": "sess-multi",
"timestamp": "2026-05-12T10:00:00Z",
"message": {
"role": "assistant",
"model": "claude-x",
"content": [
{"type": "tool_use", "id": "tu_a", "name": "Bash", "input": {"command": "ls"}},
{"type": "tool_use", "id": "tu_b", "name": "Bash", "input": {"command": "pwd"}},
],
},
}) + "\n"
)
from services.session_processors.usage import UsageProcessor
processor = UsageProcessor()
processor.process_session(
session_path=jsonl_path,
username="alice",
session_key="alice/multi_tool_turn.jsonl",
conn=conn,
)
n = conn.execute(
"SELECT COUNT(*) FROM usage_events WHERE session_id='sess-multi'"
).fetchone()[0]
assert n == 2, f"expected 2 events (one per tu_xxx), got {n}"
class TestCommandNameTagExtraction:
"""Slash invocations arrive as <command-name>/foo</command-name> embedded in
user message content (Claude Code's wire format). Unit-test iter_events
against synthetic turns so a future shape shift doesn't silently regress."""
@staticmethod
def _user_turn(content):
return {
"type": "user",
"uuid": "u1",
"parentUuid": None,
"sessionId": "sess-cn",
"timestamp": "2026-05-14T10:00:00.000Z",
"cwd": "/workspace",
"message": {"role": "user", "content": content},
}
def test_extracts_command_name_from_string_content(self):
from services.session_processors.usage_lib import iter_events
turn = self._user_turn(
"<command-name>/clear</command-name>\n<command-args></command-args>"
)
events = list(iter_events([turn]))
assert len(events) == 1
assert events[0].event_type == "slash_command"
assert events[0].command_name == "clear"
def test_extracts_command_name_from_text_block(self):
"""Defensive: same regex behavior when content arrives as a list-of-blocks
instead of a plain string, in case Claude Code's wire format shifts."""
from services.session_processors.usage_lib import iter_events
turn = self._user_turn(
[{"type": "text", "text": "<command-name>/plugin:name</command-name>"}]
)
events = list(iter_events([turn]))
assert len(events) == 1
assert events[0].command_name == "plugin:name"
def test_command_name_not_at_start_still_matches(self):
"""Real Claude Code prepends a <command-message> sibling before the
<command-name> tag — regex must search, not anchor at start."""
from services.session_processors.usage_lib import iter_events
turn = self._user_turn(
"<command-message>foo</command-message>\n"
"<command-name>/foo</command-name>\n"
"<command-args>some arg</command-args>"
)
events = list(iter_events([turn]))
assert len(events) == 1
assert events[0].command_name == "foo"
def test_plain_text_without_tag_does_not_match(self):
"""A user message that happens to contain '/foo' as prose, but no
<command-name> tag, must NOT yield a slash_command event — that's the
whole point of switching from the old `^\\s*/<name>` regex."""
from services.session_processors.usage_lib import iter_events
turn = self._user_turn("Hello world, see /not-a-command-just-prose for context.")
events = list(iter_events([turn]))
assert events == []