fix(push): read sessions from ~/.claude/projects/<encoded-cwd>/

Real bug: `agnes push` was reading `<workspace>/user/sessions/`, but
Claude Code writes session jsonls to `~/.claude/projects/<encoded-cwd>/`
and nothing on the analyst side ever copies them across. The SessionEnd
hook ran `agnes push` happily and uploaded zero sessions every time.

`cli/lib/claude_sessions.py` probes both Claude Code encoding variants
(older `/`→`-` keeping spaces+tildes; newer all-non-alphanumeric→`-`
with collapsed runs) and unions whichever exist. Users who upgraded
Claude Code mid-project end up with both encoded dirs side-by-side on
disk; the union ensures no session is left behind. Same-named jsonl in
both dirs → newest mtime wins. `<workspace>/user/sessions/` survives as
a fallback for any setup that explicitly mirrors sessions there.

Verified on real disk: helper returns 2 dirs + 8 unioned session files
for the Agnes-test workspace where the previous code returned 0.
This commit is contained in:
ZdenekSrotyr 2026-05-04 20:29:59 +02:00
parent 92d477e422
commit 08e4959185
4 changed files with 405 additions and 8 deletions

View file

@ -38,6 +38,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n
- `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`. - `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`.
- Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`). - Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`).
- `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces. - `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces.
- `agnes push` now reads Claude Code session jsonls from `~/.claude/projects/<encoded-cwd>/` (where Claude Code actually writes them), instead of `<workspace>/user/sessions/` (which the SessionEnd hook never populated — the previous code uploaded an empty list every time). Encoding logic in `cli/lib/claude_sessions.py` probes both Claude Code variants — older `/`→`-` and newer all-non-alphanumeric→`-` — and unions the result, so users who have upgraded Claude Code mid-project see sessions from both encoded dirs. Falls back to `<workspace>/user/sessions/` for back-compat.
### Removed ### Removed
- `da analyst setup`, `da analyst status`, `da sync`, `da fetch`, `da metrics`. See **Changed** for replacements. - `da analyst setup`, `da analyst status`, `da sync`, `da fetch`, `da metrics`. See **Changed** for replacements.

View file

@ -68,16 +68,16 @@ def push(
raise typer.Exit(1) raise typer.Exit(1)
workspace = Path(os.environ.get("AGNES_LOCAL_DIR", ".")).resolve() workspace = Path(os.environ.get("AGNES_LOCAL_DIR", ".")).resolve()
sessions_dir = workspace / "user" / "sessions"
local_md = workspace / ".claude" / "CLAUDE.local.md" local_md = workspace / ".claude" / "CLAUDE.local.md"
# Lazy: only enumerate when the directory actually exists. We must not # Claude Code writes session jsonls to ~/.claude/projects/<encoded-cwd>/
# mkdir here - the empty-workspace case must leave disk untouched so # — the encoding varies by Claude Code version (older: `/` -> `-`,
# the SessionEnd hook stays a true no-op for analysts who haven't # newer: all non-alphanumeric -> `-`). The helper tries both encodings
# produced any sessions yet. # and also falls back to the legacy <workspace>/user/sessions/ for
session_files = ( # setups that mirror sessions there explicitly. See
sorted(sessions_dir.glob("*.jsonl")) if sessions_dir.exists() else [] # cli/lib/claude_sessions.py for details.
) from cli.lib.claude_sessions import list_session_files
session_files = list_session_files(workspace)
has_local_md = local_md.exists() has_local_md = local_md.exists()
if dry_run: if dry_run:

137
cli/lib/claude_sessions.py Normal file
View file

@ -0,0 +1,137 @@
"""Locate Claude Code session transcripts on disk.
Claude Code writes session jsonls to ``~/.claude/projects/<encoded-cwd>/``,
where the cwd encoding is **version-dependent**:
- **Older versions**: replace ``/`` with ``-``, preserve everything else
(spaces, tildes, dots, underscores). This is what we observe on macOS
with iCloud paths today.
- **Newer versions** (and likely the default on Windows): replace every
non-alphanumeric character with ``-``, then collapse runs of consecutive
``-``. This matches "slugify"-style encoding used by recent Claude
Code releases.
We try both encodings and return whichever directory exists. This is
forward-compatible: if Claude Code adds a third encoding scheme later,
extend the variant list.
Cross-platform notes:
- ``~/.claude/projects/`` resolves via ``Path.home()``, which honors
``$HOME`` on POSIX and ``%USERPROFILE%`` on Windows.
- On Windows, the cwd will look like ``C:\\Users\\foo\\workspace``; the
variant-B (non-alphanumeric -> ``-``) encoding handles drive letters
and backslashes naturally. Variant A is POSIX-flavored but harmless
on Windows (it just won't match anything).
The legacy ``<workspace>/user/sessions/`` directory is preserved as a
fallback for setups that explicitly mirror sessions there (e.g. a
custom hook). The new code tries the Claude Code path first; if no
sessions are found there, falls back to the legacy directory.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Iterator
_PROJECTS_DIR = Path.home() / ".claude" / "projects"
def _encode_variant_a(cwd: str) -> str:
"""Older Claude Code: replace ``/`` with ``-``. Preserves spaces, tildes,
dots, underscores, etc. Observed in production on macOS with iCloud paths.
"""
return cwd.replace("/", "-")
def _encode_variant_b(cwd: str) -> str:
"""Newer Claude Code: replace every non-alphanumeric with ``-``, then
collapse consecutive ``-`` to a single one. Matches slugify-style
encoding used by recent releases.
"""
s = re.sub(r"[^a-zA-Z0-9]", "-", cwd)
# Collapse runs of `-` to a single `-`. Some Claude Code versions
# leave the runs alone; others collapse. We collapse defensively.
return re.sub(r"-+", "-", s)
def _candidate_encodings(cwd: str) -> Iterator[str]:
"""Yield candidate encoded directory names for *cwd*, ordered by
expected frequency.
Both variants are emitted regardless of platform Claude Code's
encoding is a function of its release version, not the host OS.
"""
yield _encode_variant_a(cwd)
yield _encode_variant_b(cwd)
def find_claude_sessions_dirs(workspace: Path) -> list[Path]:
"""Return every ``~/.claude/projects/<encoded>/`` directory that exists
for *workspace* usually one, but **two** when the user has run both
older and newer Claude Code versions in the same cwd (each version
writes to its own encoded dir). Returns an empty list when nothing
matches.
Reading all matching dirs is the correct default: if we picked only
one, the picker would either miss the newest sessions (if it picks
the older variant) or miss historical sessions still in the older
variant's dir.
"""
cwd = str(workspace.resolve())
found: list[Path] = []
seen: set[str] = set()
for encoded in _candidate_encodings(cwd):
if encoded in seen:
continue
seen.add(encoded)
candidate = _PROJECTS_DIR / encoded
if candidate.is_dir():
found.append(candidate)
return found
def find_claude_sessions_dir(workspace: Path) -> Path | None:
"""Return the first matching ``~/.claude/projects/<encoded>/`` directory
or ``None``. Kept for callers that only need a yes/no answer; prefer
:func:`find_claude_sessions_dirs` when listing files.
"""
dirs = find_claude_sessions_dirs(workspace)
return dirs[0] if dirs else None
def list_session_files(workspace: Path) -> list[Path]:
"""Return ``*.jsonl`` files under **all** Claude Code project directories
matching *workspace*, plus the legacy ``<workspace>/user/sessions/``
fallback.
Dedup rule when the same filename appears in multiple sources:
- Among the Claude project dirs, the **most recently modified** copy
wins. This handles the rare case of the same session-id surfacing
under both encoding variants pick the live writer's version.
- The legacy dir is only consulted for filenames absent from the
Claude dirs. It exists for back-compat with hook-managed mirrors
(which haven't run since this rewrite landed, but on-disk state may
linger).
Result is sorted by filename for deterministic upload order.
"""
files: dict[str, Path] = {}
for claude_dir in find_claude_sessions_dirs(workspace):
for f in claude_dir.glob("*.jsonl"):
existing = files.get(f.name)
if existing is None or f.stat().st_mtime > existing.stat().st_mtime:
files[f.name] = f
legacy_dir = workspace / "user" / "sessions"
if legacy_dir.exists():
for f in legacy_dir.glob("*.jsonl"):
files.setdefault(f.name, f)
return sorted(files.values(), key=lambda p: p.name)

View file

@ -0,0 +1,259 @@
"""Tests for ``cli.lib.claude_sessions`` — Claude Code session locator.
The locator must handle the two encoding schemes Claude Code uses to map a
workspace cwd to a directory name under ``~/.claude/projects/``:
- **Variant A** (older): replace ``/`` with ``-``, preserve everything else.
- **Variant B** (newer / Windows): replace every non-alphanumeric with ``-``,
collapse consecutive ``-``.
Tests use ``monkeypatch`` to redirect ``Path.home()`` at the module level so
we can fabricate either encoding under a tmp dir and verify the helper finds
it.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from cli.lib import claude_sessions
@pytest.fixture
def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Redirect ``~/.claude/projects/`` to a tmp location.
The module captures ``Path.home() / ".claude" / "projects"`` as a module
constant at import time, so we patch the constant directly rather than
monkeypatching ``Path.home``.
"""
home = tmp_path / "home"
projects = home / ".claude" / "projects"
projects.mkdir(parents=True)
monkeypatch.setattr(claude_sessions, "_PROJECTS_DIR", projects)
return home
def test_encode_variant_a_replaces_slashes_only():
enc = claude_sessions._encode_variant_a("/Users/foo/My Workspace")
assert enc == "-Users-foo-My Workspace"
def test_encode_variant_b_replaces_all_nonalnum_and_collapses():
enc = claude_sessions._encode_variant_b("/Users/foo/My Workspace.dir")
# Spaces, slashes, and dots all become single dashes (collapsed).
assert enc == "-Users-foo-My-Workspace-dir"
def test_encode_variant_b_handles_windows_path():
enc = claude_sessions._encode_variant_b("C:\\Users\\foo\\workspace")
# Backslashes + colon all → '-'; collapsed.
assert enc == "C-Users-foo-workspace"
def test_find_claude_sessions_dir_variant_a_match(
fake_home: Path, tmp_path: Path
):
"""Workspace cwd encodes via variant A on disk → helper returns it."""
workspace = tmp_path / "My Workspace"
workspace.mkdir()
encoded = claude_sessions._encode_variant_a(str(workspace.resolve()))
target = claude_sessions._PROJECTS_DIR / encoded
target.mkdir()
found = claude_sessions.find_claude_sessions_dir(workspace)
assert found == target
def test_find_claude_sessions_dir_variant_b_match(
fake_home: Path, tmp_path: Path
):
"""Workspace cwd encodes via variant B → helper returns it."""
workspace = tmp_path / "My.Workspace" # has dots → variant A and B differ
workspace.mkdir()
encoded_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
encoded_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
# Sanity: the two encodings really do differ for this fixture.
assert encoded_a != encoded_b
target = claude_sessions._PROJECTS_DIR / encoded_b
target.mkdir()
found = claude_sessions.find_claude_sessions_dir(workspace)
assert found == target
def test_find_claude_sessions_dir_no_match_returns_none(
fake_home: Path, tmp_path: Path
):
"""No encoded dir exists → returns None (caller falls back to legacy)."""
workspace = tmp_path / "untouched"
workspace.mkdir()
assert claude_sessions.find_claude_sessions_dir(workspace) is None
def test_find_claude_sessions_dirs_returns_all_when_both_exist(
fake_home: Path, tmp_path: Path
):
"""When both encoded dirs exist on disk (older + newer Claude Code
versions sharing the same cwd), the helper returns BOTH so the caller
can union their session files. This matches reality: users who have
upgraded Claude Code mid-project end up with two sibling project dirs,
each holding a slice of their session history."""
workspace = tmp_path / "My.Wkspace" # ensure A != B
workspace.mkdir()
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
assert enc_a != enc_b
(claude_sessions._PROJECTS_DIR / enc_a).mkdir()
(claude_sessions._PROJECTS_DIR / enc_b).mkdir()
dirs = claude_sessions.find_claude_sessions_dirs(workspace)
assert set(dirs) == {
claude_sessions._PROJECTS_DIR / enc_a,
claude_sessions._PROJECTS_DIR / enc_b,
}
def test_list_session_files_unions_both_variants(
fake_home: Path, tmp_path: Path
):
"""When the same workspace has both encoded dirs, files from both must
surface in the listing that's the whole point of probing both."""
workspace = tmp_path / "My.Wkspace"
workspace.mkdir()
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
assert enc_a != enc_b
dir_a = claude_sessions._PROJECTS_DIR / enc_a
dir_b = claude_sessions._PROJECTS_DIR / enc_b
dir_a.mkdir()
dir_b.mkdir()
(dir_a / "old.jsonl").write_text("{}\n")
(dir_b / "new.jsonl").write_text("{}\n")
files = claude_sessions.list_session_files(workspace)
assert sorted(f.name for f in files) == ["new.jsonl", "old.jsonl"]
def test_list_session_files_picks_newest_when_same_name_in_both_variants(
fake_home: Path, tmp_path: Path
):
"""Same session id under both encoded dirs → take the most recently
modified copy. Models the case where Claude Code was upgraded mid-
session and re-wrote the same id under the new encoding."""
import os
import time
workspace = tmp_path / "My.Wkspace"
workspace.mkdir()
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
assert enc_a != enc_b
dir_a = claude_sessions._PROJECTS_DIR / enc_a
dir_b = claude_sessions._PROJECTS_DIR / enc_b
dir_a.mkdir()
dir_b.mkdir()
older = dir_a / "shared.jsonl"
older.write_text('{"src":"a-old"}\n')
# Push older mtime back so the newer write is unambiguously newer.
past = time.time() - 3600
os.utime(older, (past, past))
newer = dir_b / "shared.jsonl"
newer.write_text('{"src":"b-new"}\n')
files = claude_sessions.list_session_files(workspace)
assert len(files) == 1
assert files[0].read_text() == '{"src":"b-new"}\n'
def test_list_session_files_reads_from_claude_dir(
fake_home: Path, tmp_path: Path
):
"""When Claude Code wrote sessions to ~/.claude/projects/<enc>/, they
show up in the list even though <workspace>/user/sessions/ is empty."""
workspace = tmp_path / "wkspace"
workspace.mkdir()
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
target = claude_sessions._PROJECTS_DIR / enc
target.mkdir()
(target / "session-1.jsonl").write_text('{"event":"hi"}\n')
(target / "session-2.jsonl").write_text('{"event":"there"}\n')
files = claude_sessions.list_session_files(workspace)
assert [f.name for f in files] == ["session-1.jsonl", "session-2.jsonl"]
# Each file must come from the Claude dir, not legacy.
for f in files:
assert str(f).startswith(str(target))
def test_list_session_files_falls_back_to_legacy(
fake_home: Path, tmp_path: Path
):
"""No Claude dir exists, but <workspace>/user/sessions/ does → legacy
files are returned (back-compat for hook-managed mirrors)."""
workspace = tmp_path / "wkspace"
workspace.mkdir()
legacy = workspace / "user" / "sessions"
legacy.mkdir(parents=True)
(legacy / "old.jsonl").write_text('{"event":"legacy"}\n')
files = claude_sessions.list_session_files(workspace)
assert [f.name for f in files] == ["old.jsonl"]
def test_list_session_files_dedupes_by_name_claude_wins(
fake_home: Path, tmp_path: Path
):
"""Both Claude dir and legacy dir contain a same-named jsonl. Helper
returns one entry, sourced from the Claude dir (live writer)."""
workspace = tmp_path / "wkspace"
workspace.mkdir()
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
target = claude_sessions._PROJECTS_DIR / enc
target.mkdir()
(target / "shared.jsonl").write_text('{"src":"claude"}\n')
legacy = workspace / "user" / "sessions"
legacy.mkdir(parents=True)
(legacy / "shared.jsonl").write_text('{"src":"legacy"}\n')
files = claude_sessions.list_session_files(workspace)
assert len(files) == 1
assert files[0].read_text() == '{"src":"claude"}\n'
def test_list_session_files_unions_when_disjoint(
fake_home: Path, tmp_path: Path
):
"""Different filenames in each dir → both surface in the result."""
workspace = tmp_path / "wkspace"
workspace.mkdir()
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
target = claude_sessions._PROJECTS_DIR / enc
target.mkdir()
(target / "fresh.jsonl").write_text("{}\n")
legacy = workspace / "user" / "sessions"
legacy.mkdir(parents=True)
(legacy / "old.jsonl").write_text("{}\n")
files = claude_sessions.list_session_files(workspace)
assert sorted(f.name for f in files) == ["fresh.jsonl", "old.jsonl"]
def test_list_session_files_empty_returns_empty_list(
fake_home: Path, tmp_path: Path
):
"""No sources exist at all → empty list, no mkdir side effect."""
workspace = tmp_path / "wkspace"
workspace.mkdir()
files = claude_sessions.list_session_files(workspace)
assert files == []
assert not (workspace / "user").exists()