diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b38d5f..dfdd41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n - `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`. - Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`). - `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces. +- `agnes push` now reads Claude Code session jsonls from `~/.claude/projects//` (where Claude Code actually writes them), instead of `/user/sessions/` (which the SessionEnd hook never populated — the previous code uploaded an empty list every time). Encoding logic in `cli/lib/claude_sessions.py` probes both Claude Code variants — older `/`→`-` and newer all-non-alphanumeric→`-` — and unions the result, so users who have upgraded Claude Code mid-project see sessions from both encoded dirs. Falls back to `/user/sessions/` for back-compat. ### Removed - `da analyst setup`, `da analyst status`, `da sync`, `da fetch`, `da metrics`. See **Changed** for replacements. diff --git a/cli/commands/push.py b/cli/commands/push.py index b3464f0..525fc9e 100644 --- a/cli/commands/push.py +++ b/cli/commands/push.py @@ -68,16 +68,16 @@ def push( raise typer.Exit(1) workspace = Path(os.environ.get("AGNES_LOCAL_DIR", ".")).resolve() - sessions_dir = workspace / "user" / "sessions" local_md = workspace / ".claude" / "CLAUDE.local.md" - # Lazy: only enumerate when the directory actually exists. We must not - # mkdir here - the empty-workspace case must leave disk untouched so - # the SessionEnd hook stays a true no-op for analysts who haven't - # produced any sessions yet. - session_files = ( - sorted(sessions_dir.glob("*.jsonl")) if sessions_dir.exists() else [] - ) + # Claude Code writes session jsonls to ~/.claude/projects// + # — the encoding varies by Claude Code version (older: `/` -> `-`, + # newer: all non-alphanumeric -> `-`). The helper tries both encodings + # and also falls back to the legacy /user/sessions/ for + # setups that mirror sessions there explicitly. See + # cli/lib/claude_sessions.py for details. + from cli.lib.claude_sessions import list_session_files + session_files = list_session_files(workspace) has_local_md = local_md.exists() if dry_run: diff --git a/cli/lib/claude_sessions.py b/cli/lib/claude_sessions.py new file mode 100644 index 0000000..cf7b890 --- /dev/null +++ b/cli/lib/claude_sessions.py @@ -0,0 +1,137 @@ +"""Locate Claude Code session transcripts on disk. + +Claude Code writes session jsonls to ``~/.claude/projects//``, +where the cwd encoding is **version-dependent**: + +- **Older versions**: replace ``/`` with ``-``, preserve everything else + (spaces, tildes, dots, underscores). This is what we observe on macOS + with iCloud paths today. + +- **Newer versions** (and likely the default on Windows): replace every + non-alphanumeric character with ``-``, then collapse runs of consecutive + ``-``. This matches "slugify"-style encoding used by recent Claude + Code releases. + +We try both encodings and return whichever directory exists. This is +forward-compatible: if Claude Code adds a third encoding scheme later, +extend the variant list. + +Cross-platform notes: +- ``~/.claude/projects/`` resolves via ``Path.home()``, which honors + ``$HOME`` on POSIX and ``%USERPROFILE%`` on Windows. +- On Windows, the cwd will look like ``C:\\Users\\foo\\workspace``; the + variant-B (non-alphanumeric -> ``-``) encoding handles drive letters + and backslashes naturally. Variant A is POSIX-flavored but harmless + on Windows (it just won't match anything). + +The legacy ``/user/sessions/`` directory is preserved as a +fallback for setups that explicitly mirror sessions there (e.g. a +custom hook). The new code tries the Claude Code path first; if no +sessions are found there, falls back to the legacy directory. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Iterator + + +_PROJECTS_DIR = Path.home() / ".claude" / "projects" + + +def _encode_variant_a(cwd: str) -> str: + """Older Claude Code: replace ``/`` with ``-``. Preserves spaces, tildes, + dots, underscores, etc. Observed in production on macOS with iCloud paths. + """ + return cwd.replace("/", "-") + + +def _encode_variant_b(cwd: str) -> str: + """Newer Claude Code: replace every non-alphanumeric with ``-``, then + collapse consecutive ``-`` to a single one. Matches slugify-style + encoding used by recent releases. + """ + s = re.sub(r"[^a-zA-Z0-9]", "-", cwd) + # Collapse runs of `-` to a single `-`. Some Claude Code versions + # leave the runs alone; others collapse. We collapse defensively. + return re.sub(r"-+", "-", s) + + +def _candidate_encodings(cwd: str) -> Iterator[str]: + """Yield candidate encoded directory names for *cwd*, ordered by + expected frequency. + + Both variants are emitted regardless of platform — Claude Code's + encoding is a function of its release version, not the host OS. + """ + yield _encode_variant_a(cwd) + yield _encode_variant_b(cwd) + + +def find_claude_sessions_dirs(workspace: Path) -> list[Path]: + """Return every ``~/.claude/projects//`` directory that exists + for *workspace* — usually one, but **two** when the user has run both + older and newer Claude Code versions in the same cwd (each version + writes to its own encoded dir). Returns an empty list when nothing + matches. + + Reading all matching dirs is the correct default: if we picked only + one, the picker would either miss the newest sessions (if it picks + the older variant) or miss historical sessions still in the older + variant's dir. + """ + cwd = str(workspace.resolve()) + + found: list[Path] = [] + seen: set[str] = set() + for encoded in _candidate_encodings(cwd): + if encoded in seen: + continue + seen.add(encoded) + candidate = _PROJECTS_DIR / encoded + if candidate.is_dir(): + found.append(candidate) + + return found + + +def find_claude_sessions_dir(workspace: Path) -> Path | None: + """Return the first matching ``~/.claude/projects//`` directory + or ``None``. Kept for callers that only need a yes/no answer; prefer + :func:`find_claude_sessions_dirs` when listing files. + """ + dirs = find_claude_sessions_dirs(workspace) + return dirs[0] if dirs else None + + +def list_session_files(workspace: Path) -> list[Path]: + """Return ``*.jsonl`` files under **all** Claude Code project directories + matching *workspace*, plus the legacy ``/user/sessions/`` + fallback. + + Dedup rule when the same filename appears in multiple sources: + - Among the Claude project dirs, the **most recently modified** copy + wins. This handles the rare case of the same session-id surfacing + under both encoding variants — pick the live writer's version. + - The legacy dir is only consulted for filenames absent from the + Claude dirs. It exists for back-compat with hook-managed mirrors + (which haven't run since this rewrite landed, but on-disk state may + linger). + + Result is sorted by filename for deterministic upload order. + """ + files: dict[str, Path] = {} + + for claude_dir in find_claude_sessions_dirs(workspace): + for f in claude_dir.glob("*.jsonl"): + existing = files.get(f.name) + if existing is None or f.stat().st_mtime > existing.stat().st_mtime: + files[f.name] = f + + legacy_dir = workspace / "user" / "sessions" + if legacy_dir.exists(): + for f in legacy_dir.glob("*.jsonl"): + files.setdefault(f.name, f) + + return sorted(files.values(), key=lambda p: p.name) diff --git a/tests/test_claude_sessions.py b/tests/test_claude_sessions.py new file mode 100644 index 0000000..300141d --- /dev/null +++ b/tests/test_claude_sessions.py @@ -0,0 +1,259 @@ +"""Tests for ``cli.lib.claude_sessions`` — Claude Code session locator. + +The locator must handle the two encoding schemes Claude Code uses to map a +workspace cwd to a directory name under ``~/.claude/projects/``: + +- **Variant A** (older): replace ``/`` with ``-``, preserve everything else. +- **Variant B** (newer / Windows): replace every non-alphanumeric with ``-``, + collapse consecutive ``-``. + +Tests use ``monkeypatch`` to redirect ``Path.home()`` at the module level so +we can fabricate either encoding under a tmp dir and verify the helper finds +it. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cli.lib import claude_sessions + + +@pytest.fixture +def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Redirect ``~/.claude/projects/`` to a tmp location. + + The module captures ``Path.home() / ".claude" / "projects"`` as a module + constant at import time, so we patch the constant directly rather than + monkeypatching ``Path.home``. + """ + home = tmp_path / "home" + projects = home / ".claude" / "projects" + projects.mkdir(parents=True) + monkeypatch.setattr(claude_sessions, "_PROJECTS_DIR", projects) + return home + + +def test_encode_variant_a_replaces_slashes_only(): + enc = claude_sessions._encode_variant_a("/Users/foo/My Workspace") + assert enc == "-Users-foo-My Workspace" + + +def test_encode_variant_b_replaces_all_nonalnum_and_collapses(): + enc = claude_sessions._encode_variant_b("/Users/foo/My Workspace.dir") + # Spaces, slashes, and dots all become single dashes (collapsed). + assert enc == "-Users-foo-My-Workspace-dir" + + +def test_encode_variant_b_handles_windows_path(): + enc = claude_sessions._encode_variant_b("C:\\Users\\foo\\workspace") + # Backslashes + colon all → '-'; collapsed. + assert enc == "C-Users-foo-workspace" + + +def test_find_claude_sessions_dir_variant_a_match( + fake_home: Path, tmp_path: Path +): + """Workspace cwd encodes via variant A on disk → helper returns it.""" + workspace = tmp_path / "My Workspace" + workspace.mkdir() + encoded = claude_sessions._encode_variant_a(str(workspace.resolve())) + target = claude_sessions._PROJECTS_DIR / encoded + target.mkdir() + + found = claude_sessions.find_claude_sessions_dir(workspace) + assert found == target + + +def test_find_claude_sessions_dir_variant_b_match( + fake_home: Path, tmp_path: Path +): + """Workspace cwd encodes via variant B → helper returns it.""" + workspace = tmp_path / "My.Workspace" # has dots → variant A and B differ + workspace.mkdir() + + encoded_b = claude_sessions._encode_variant_b(str(workspace.resolve())) + encoded_a = claude_sessions._encode_variant_a(str(workspace.resolve())) + # Sanity: the two encodings really do differ for this fixture. + assert encoded_a != encoded_b + + target = claude_sessions._PROJECTS_DIR / encoded_b + target.mkdir() + + found = claude_sessions.find_claude_sessions_dir(workspace) + assert found == target + + +def test_find_claude_sessions_dir_no_match_returns_none( + fake_home: Path, tmp_path: Path +): + """No encoded dir exists → returns None (caller falls back to legacy).""" + workspace = tmp_path / "untouched" + workspace.mkdir() + assert claude_sessions.find_claude_sessions_dir(workspace) is None + + +def test_find_claude_sessions_dirs_returns_all_when_both_exist( + fake_home: Path, tmp_path: Path +): + """When both encoded dirs exist on disk (older + newer Claude Code + versions sharing the same cwd), the helper returns BOTH so the caller + can union their session files. This matches reality: users who have + upgraded Claude Code mid-project end up with two sibling project dirs, + each holding a slice of their session history.""" + workspace = tmp_path / "My.Wkspace" # ensure A != B + workspace.mkdir() + enc_a = claude_sessions._encode_variant_a(str(workspace.resolve())) + enc_b = claude_sessions._encode_variant_b(str(workspace.resolve())) + assert enc_a != enc_b + (claude_sessions._PROJECTS_DIR / enc_a).mkdir() + (claude_sessions._PROJECTS_DIR / enc_b).mkdir() + + dirs = claude_sessions.find_claude_sessions_dirs(workspace) + assert set(dirs) == { + claude_sessions._PROJECTS_DIR / enc_a, + claude_sessions._PROJECTS_DIR / enc_b, + } + + +def test_list_session_files_unions_both_variants( + fake_home: Path, tmp_path: Path +): + """When the same workspace has both encoded dirs, files from both must + surface in the listing — that's the whole point of probing both.""" + workspace = tmp_path / "My.Wkspace" + workspace.mkdir() + enc_a = claude_sessions._encode_variant_a(str(workspace.resolve())) + enc_b = claude_sessions._encode_variant_b(str(workspace.resolve())) + assert enc_a != enc_b + dir_a = claude_sessions._PROJECTS_DIR / enc_a + dir_b = claude_sessions._PROJECTS_DIR / enc_b + dir_a.mkdir() + dir_b.mkdir() + (dir_a / "old.jsonl").write_text("{}\n") + (dir_b / "new.jsonl").write_text("{}\n") + + files = claude_sessions.list_session_files(workspace) + assert sorted(f.name for f in files) == ["new.jsonl", "old.jsonl"] + + +def test_list_session_files_picks_newest_when_same_name_in_both_variants( + fake_home: Path, tmp_path: Path +): + """Same session id under both encoded dirs → take the most recently + modified copy. Models the case where Claude Code was upgraded mid- + session and re-wrote the same id under the new encoding.""" + import os + import time + + workspace = tmp_path / "My.Wkspace" + workspace.mkdir() + enc_a = claude_sessions._encode_variant_a(str(workspace.resolve())) + enc_b = claude_sessions._encode_variant_b(str(workspace.resolve())) + assert enc_a != enc_b + dir_a = claude_sessions._PROJECTS_DIR / enc_a + dir_b = claude_sessions._PROJECTS_DIR / enc_b + dir_a.mkdir() + dir_b.mkdir() + + older = dir_a / "shared.jsonl" + older.write_text('{"src":"a-old"}\n') + # Push older mtime back so the newer write is unambiguously newer. + past = time.time() - 3600 + os.utime(older, (past, past)) + + newer = dir_b / "shared.jsonl" + newer.write_text('{"src":"b-new"}\n') + + files = claude_sessions.list_session_files(workspace) + assert len(files) == 1 + assert files[0].read_text() == '{"src":"b-new"}\n' + + +def test_list_session_files_reads_from_claude_dir( + fake_home: Path, tmp_path: Path +): + """When Claude Code wrote sessions to ~/.claude/projects//, they + show up in the list — even though /user/sessions/ is empty.""" + workspace = tmp_path / "wkspace" + workspace.mkdir() + enc = claude_sessions._encode_variant_a(str(workspace.resolve())) + target = claude_sessions._PROJECTS_DIR / enc + target.mkdir() + (target / "session-1.jsonl").write_text('{"event":"hi"}\n') + (target / "session-2.jsonl").write_text('{"event":"there"}\n') + + files = claude_sessions.list_session_files(workspace) + assert [f.name for f in files] == ["session-1.jsonl", "session-2.jsonl"] + # Each file must come from the Claude dir, not legacy. + for f in files: + assert str(f).startswith(str(target)) + + +def test_list_session_files_falls_back_to_legacy( + fake_home: Path, tmp_path: Path +): + """No Claude dir exists, but /user/sessions/ does → legacy + files are returned (back-compat for hook-managed mirrors).""" + workspace = tmp_path / "wkspace" + workspace.mkdir() + legacy = workspace / "user" / "sessions" + legacy.mkdir(parents=True) + (legacy / "old.jsonl").write_text('{"event":"legacy"}\n') + + files = claude_sessions.list_session_files(workspace) + assert [f.name for f in files] == ["old.jsonl"] + + +def test_list_session_files_dedupes_by_name_claude_wins( + fake_home: Path, tmp_path: Path +): + """Both Claude dir and legacy dir contain a same-named jsonl. Helper + returns one entry, sourced from the Claude dir (live writer).""" + workspace = tmp_path / "wkspace" + workspace.mkdir() + enc = claude_sessions._encode_variant_a(str(workspace.resolve())) + target = claude_sessions._PROJECTS_DIR / enc + target.mkdir() + (target / "shared.jsonl").write_text('{"src":"claude"}\n') + + legacy = workspace / "user" / "sessions" + legacy.mkdir(parents=True) + (legacy / "shared.jsonl").write_text('{"src":"legacy"}\n') + + files = claude_sessions.list_session_files(workspace) + assert len(files) == 1 + assert files[0].read_text() == '{"src":"claude"}\n' + + +def test_list_session_files_unions_when_disjoint( + fake_home: Path, tmp_path: Path +): + """Different filenames in each dir → both surface in the result.""" + workspace = tmp_path / "wkspace" + workspace.mkdir() + enc = claude_sessions._encode_variant_a(str(workspace.resolve())) + target = claude_sessions._PROJECTS_DIR / enc + target.mkdir() + (target / "fresh.jsonl").write_text("{}\n") + + legacy = workspace / "user" / "sessions" + legacy.mkdir(parents=True) + (legacy / "old.jsonl").write_text("{}\n") + + files = claude_sessions.list_session_files(workspace) + assert sorted(f.name for f in files) == ["fresh.jsonl", "old.jsonl"] + + +def test_list_session_files_empty_returns_empty_list( + fake_home: Path, tmp_path: Path +): + """No sources exist at all → empty list, no mkdir side effect.""" + workspace = tmp_path / "wkspace" + workspace.mkdir() + + files = claude_sessions.list_session_files(workspace) + assert files == [] + assert not (workspace / "user").exists()