fix(push): read sessions from ~/.claude/projects/<encoded-cwd>/
Real bug: `agnes push` was reading `<workspace>/user/sessions/`, but Claude Code writes session jsonls to `~/.claude/projects/<encoded-cwd>/` and nothing on the analyst side ever copies them across. The SessionEnd hook ran `agnes push` happily and uploaded zero sessions every time. `cli/lib/claude_sessions.py` probes both Claude Code encoding variants (older `/`→`-` keeping spaces+tildes; newer all-non-alphanumeric→`-` with collapsed runs) and unions whichever exist. Users who upgraded Claude Code mid-project end up with both encoded dirs side-by-side on disk; the union ensures no session is left behind. Same-named jsonl in both dirs → newest mtime wins. `<workspace>/user/sessions/` survives as a fallback for any setup that explicitly mirrors sessions there. Verified on real disk: helper returns 2 dirs + 8 unioned session files for the Agnes-test workspace where the previous code returned 0.
This commit is contained in:
parent
92d477e422
commit
08e4959185
4 changed files with 405 additions and 8 deletions
|
|
@ -38,6 +38,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n
|
|||
- `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`.
|
||||
- Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`).
|
||||
- `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces.
|
||||
- `agnes push` now reads Claude Code session jsonls from `~/.claude/projects/<encoded-cwd>/` (where Claude Code actually writes them), instead of `<workspace>/user/sessions/` (which the SessionEnd hook never populated — the previous code uploaded an empty list every time). Encoding logic in `cli/lib/claude_sessions.py` probes both Claude Code variants — older `/`→`-` and newer all-non-alphanumeric→`-` — and unions the result, so users who have upgraded Claude Code mid-project see sessions from both encoded dirs. Falls back to `<workspace>/user/sessions/` for back-compat.
|
||||
|
||||
### Removed
|
||||
- `da analyst setup`, `da analyst status`, `da sync`, `da fetch`, `da metrics`. See **Changed** for replacements.
|
||||
|
|
|
|||
|
|
@ -68,16 +68,16 @@ def push(
|
|||
raise typer.Exit(1)
|
||||
|
||||
workspace = Path(os.environ.get("AGNES_LOCAL_DIR", ".")).resolve()
|
||||
sessions_dir = workspace / "user" / "sessions"
|
||||
local_md = workspace / ".claude" / "CLAUDE.local.md"
|
||||
|
||||
# Lazy: only enumerate when the directory actually exists. We must not
|
||||
# mkdir here - the empty-workspace case must leave disk untouched so
|
||||
# the SessionEnd hook stays a true no-op for analysts who haven't
|
||||
# produced any sessions yet.
|
||||
session_files = (
|
||||
sorted(sessions_dir.glob("*.jsonl")) if sessions_dir.exists() else []
|
||||
)
|
||||
# Claude Code writes session jsonls to ~/.claude/projects/<encoded-cwd>/
|
||||
# — the encoding varies by Claude Code version (older: `/` -> `-`,
|
||||
# newer: all non-alphanumeric -> `-`). The helper tries both encodings
|
||||
# and also falls back to the legacy <workspace>/user/sessions/ for
|
||||
# setups that mirror sessions there explicitly. See
|
||||
# cli/lib/claude_sessions.py for details.
|
||||
from cli.lib.claude_sessions import list_session_files
|
||||
session_files = list_session_files(workspace)
|
||||
has_local_md = local_md.exists()
|
||||
|
||||
if dry_run:
|
||||
|
|
|
|||
137
cli/lib/claude_sessions.py
Normal file
137
cli/lib/claude_sessions.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Locate Claude Code session transcripts on disk.
|
||||
|
||||
Claude Code writes session jsonls to ``~/.claude/projects/<encoded-cwd>/``,
|
||||
where the cwd encoding is **version-dependent**:
|
||||
|
||||
- **Older versions**: replace ``/`` with ``-``, preserve everything else
|
||||
(spaces, tildes, dots, underscores). This is what we observe on macOS
|
||||
with iCloud paths today.
|
||||
|
||||
- **Newer versions** (and likely the default on Windows): replace every
|
||||
non-alphanumeric character with ``-``, then collapse runs of consecutive
|
||||
``-``. This matches "slugify"-style encoding used by recent Claude
|
||||
Code releases.
|
||||
|
||||
We try both encodings and return whichever directory exists. This is
|
||||
forward-compatible: if Claude Code adds a third encoding scheme later,
|
||||
extend the variant list.
|
||||
|
||||
Cross-platform notes:
|
||||
- ``~/.claude/projects/`` resolves via ``Path.home()``, which honors
|
||||
``$HOME`` on POSIX and ``%USERPROFILE%`` on Windows.
|
||||
- On Windows, the cwd will look like ``C:\\Users\\foo\\workspace``; the
|
||||
variant-B (non-alphanumeric -> ``-``) encoding handles drive letters
|
||||
and backslashes naturally. Variant A is POSIX-flavored but harmless
|
||||
on Windows (it just won't match anything).
|
||||
|
||||
The legacy ``<workspace>/user/sessions/`` directory is preserved as a
|
||||
fallback for setups that explicitly mirror sessions there (e.g. a
|
||||
custom hook). The new code tries the Claude Code path first; if no
|
||||
sessions are found there, falls back to the legacy directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
_PROJECTS_DIR = Path.home() / ".claude" / "projects"
|
||||
|
||||
|
||||
def _encode_variant_a(cwd: str) -> str:
|
||||
"""Older Claude Code: replace ``/`` with ``-``. Preserves spaces, tildes,
|
||||
dots, underscores, etc. Observed in production on macOS with iCloud paths.
|
||||
"""
|
||||
return cwd.replace("/", "-")
|
||||
|
||||
|
||||
def _encode_variant_b(cwd: str) -> str:
|
||||
"""Newer Claude Code: replace every non-alphanumeric with ``-``, then
|
||||
collapse consecutive ``-`` to a single one. Matches slugify-style
|
||||
encoding used by recent releases.
|
||||
"""
|
||||
s = re.sub(r"[^a-zA-Z0-9]", "-", cwd)
|
||||
# Collapse runs of `-` to a single `-`. Some Claude Code versions
|
||||
# leave the runs alone; others collapse. We collapse defensively.
|
||||
return re.sub(r"-+", "-", s)
|
||||
|
||||
|
||||
def _candidate_encodings(cwd: str) -> Iterator[str]:
|
||||
"""Yield candidate encoded directory names for *cwd*, ordered by
|
||||
expected frequency.
|
||||
|
||||
Both variants are emitted regardless of platform — Claude Code's
|
||||
encoding is a function of its release version, not the host OS.
|
||||
"""
|
||||
yield _encode_variant_a(cwd)
|
||||
yield _encode_variant_b(cwd)
|
||||
|
||||
|
||||
def find_claude_sessions_dirs(workspace: Path) -> list[Path]:
|
||||
"""Return every ``~/.claude/projects/<encoded>/`` directory that exists
|
||||
for *workspace* — usually one, but **two** when the user has run both
|
||||
older and newer Claude Code versions in the same cwd (each version
|
||||
writes to its own encoded dir). Returns an empty list when nothing
|
||||
matches.
|
||||
|
||||
Reading all matching dirs is the correct default: if we picked only
|
||||
one, the picker would either miss the newest sessions (if it picks
|
||||
the older variant) or miss historical sessions still in the older
|
||||
variant's dir.
|
||||
"""
|
||||
cwd = str(workspace.resolve())
|
||||
|
||||
found: list[Path] = []
|
||||
seen: set[str] = set()
|
||||
for encoded in _candidate_encodings(cwd):
|
||||
if encoded in seen:
|
||||
continue
|
||||
seen.add(encoded)
|
||||
candidate = _PROJECTS_DIR / encoded
|
||||
if candidate.is_dir():
|
||||
found.append(candidate)
|
||||
|
||||
return found
|
||||
|
||||
|
||||
def find_claude_sessions_dir(workspace: Path) -> Path | None:
|
||||
"""Return the first matching ``~/.claude/projects/<encoded>/`` directory
|
||||
or ``None``. Kept for callers that only need a yes/no answer; prefer
|
||||
:func:`find_claude_sessions_dirs` when listing files.
|
||||
"""
|
||||
dirs = find_claude_sessions_dirs(workspace)
|
||||
return dirs[0] if dirs else None
|
||||
|
||||
|
||||
def list_session_files(workspace: Path) -> list[Path]:
|
||||
"""Return ``*.jsonl`` files under **all** Claude Code project directories
|
||||
matching *workspace*, plus the legacy ``<workspace>/user/sessions/``
|
||||
fallback.
|
||||
|
||||
Dedup rule when the same filename appears in multiple sources:
|
||||
- Among the Claude project dirs, the **most recently modified** copy
|
||||
wins. This handles the rare case of the same session-id surfacing
|
||||
under both encoding variants — pick the live writer's version.
|
||||
- The legacy dir is only consulted for filenames absent from the
|
||||
Claude dirs. It exists for back-compat with hook-managed mirrors
|
||||
(which haven't run since this rewrite landed, but on-disk state may
|
||||
linger).
|
||||
|
||||
Result is sorted by filename for deterministic upload order.
|
||||
"""
|
||||
files: dict[str, Path] = {}
|
||||
|
||||
for claude_dir in find_claude_sessions_dirs(workspace):
|
||||
for f in claude_dir.glob("*.jsonl"):
|
||||
existing = files.get(f.name)
|
||||
if existing is None or f.stat().st_mtime > existing.stat().st_mtime:
|
||||
files[f.name] = f
|
||||
|
||||
legacy_dir = workspace / "user" / "sessions"
|
||||
if legacy_dir.exists():
|
||||
for f in legacy_dir.glob("*.jsonl"):
|
||||
files.setdefault(f.name, f)
|
||||
|
||||
return sorted(files.values(), key=lambda p: p.name)
|
||||
259
tests/test_claude_sessions.py
Normal file
259
tests/test_claude_sessions.py
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
"""Tests for ``cli.lib.claude_sessions`` — Claude Code session locator.
|
||||
|
||||
The locator must handle the two encoding schemes Claude Code uses to map a
|
||||
workspace cwd to a directory name under ``~/.claude/projects/``:
|
||||
|
||||
- **Variant A** (older): replace ``/`` with ``-``, preserve everything else.
|
||||
- **Variant B** (newer / Windows): replace every non-alphanumeric with ``-``,
|
||||
collapse consecutive ``-``.
|
||||
|
||||
Tests use ``monkeypatch`` to redirect ``Path.home()`` at the module level so
|
||||
we can fabricate either encoding under a tmp dir and verify the helper finds
|
||||
it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from cli.lib import claude_sessions
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Redirect ``~/.claude/projects/`` to a tmp location.
|
||||
|
||||
The module captures ``Path.home() / ".claude" / "projects"`` as a module
|
||||
constant at import time, so we patch the constant directly rather than
|
||||
monkeypatching ``Path.home``.
|
||||
"""
|
||||
home = tmp_path / "home"
|
||||
projects = home / ".claude" / "projects"
|
||||
projects.mkdir(parents=True)
|
||||
monkeypatch.setattr(claude_sessions, "_PROJECTS_DIR", projects)
|
||||
return home
|
||||
|
||||
|
||||
def test_encode_variant_a_replaces_slashes_only():
|
||||
enc = claude_sessions._encode_variant_a("/Users/foo/My Workspace")
|
||||
assert enc == "-Users-foo-My Workspace"
|
||||
|
||||
|
||||
def test_encode_variant_b_replaces_all_nonalnum_and_collapses():
|
||||
enc = claude_sessions._encode_variant_b("/Users/foo/My Workspace.dir")
|
||||
# Spaces, slashes, and dots all become single dashes (collapsed).
|
||||
assert enc == "-Users-foo-My-Workspace-dir"
|
||||
|
||||
|
||||
def test_encode_variant_b_handles_windows_path():
|
||||
enc = claude_sessions._encode_variant_b("C:\\Users\\foo\\workspace")
|
||||
# Backslashes + colon all → '-'; collapsed.
|
||||
assert enc == "C-Users-foo-workspace"
|
||||
|
||||
|
||||
def test_find_claude_sessions_dir_variant_a_match(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""Workspace cwd encodes via variant A on disk → helper returns it."""
|
||||
workspace = tmp_path / "My Workspace"
|
||||
workspace.mkdir()
|
||||
encoded = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
target = claude_sessions._PROJECTS_DIR / encoded
|
||||
target.mkdir()
|
||||
|
||||
found = claude_sessions.find_claude_sessions_dir(workspace)
|
||||
assert found == target
|
||||
|
||||
|
||||
def test_find_claude_sessions_dir_variant_b_match(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""Workspace cwd encodes via variant B → helper returns it."""
|
||||
workspace = tmp_path / "My.Workspace" # has dots → variant A and B differ
|
||||
workspace.mkdir()
|
||||
|
||||
encoded_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
|
||||
encoded_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
# Sanity: the two encodings really do differ for this fixture.
|
||||
assert encoded_a != encoded_b
|
||||
|
||||
target = claude_sessions._PROJECTS_DIR / encoded_b
|
||||
target.mkdir()
|
||||
|
||||
found = claude_sessions.find_claude_sessions_dir(workspace)
|
||||
assert found == target
|
||||
|
||||
|
||||
def test_find_claude_sessions_dir_no_match_returns_none(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""No encoded dir exists → returns None (caller falls back to legacy)."""
|
||||
workspace = tmp_path / "untouched"
|
||||
workspace.mkdir()
|
||||
assert claude_sessions.find_claude_sessions_dir(workspace) is None
|
||||
|
||||
|
||||
def test_find_claude_sessions_dirs_returns_all_when_both_exist(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""When both encoded dirs exist on disk (older + newer Claude Code
|
||||
versions sharing the same cwd), the helper returns BOTH so the caller
|
||||
can union their session files. This matches reality: users who have
|
||||
upgraded Claude Code mid-project end up with two sibling project dirs,
|
||||
each holding a slice of their session history."""
|
||||
workspace = tmp_path / "My.Wkspace" # ensure A != B
|
||||
workspace.mkdir()
|
||||
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
|
||||
assert enc_a != enc_b
|
||||
(claude_sessions._PROJECTS_DIR / enc_a).mkdir()
|
||||
(claude_sessions._PROJECTS_DIR / enc_b).mkdir()
|
||||
|
||||
dirs = claude_sessions.find_claude_sessions_dirs(workspace)
|
||||
assert set(dirs) == {
|
||||
claude_sessions._PROJECTS_DIR / enc_a,
|
||||
claude_sessions._PROJECTS_DIR / enc_b,
|
||||
}
|
||||
|
||||
|
||||
def test_list_session_files_unions_both_variants(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""When the same workspace has both encoded dirs, files from both must
|
||||
surface in the listing — that's the whole point of probing both."""
|
||||
workspace = tmp_path / "My.Wkspace"
|
||||
workspace.mkdir()
|
||||
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
|
||||
assert enc_a != enc_b
|
||||
dir_a = claude_sessions._PROJECTS_DIR / enc_a
|
||||
dir_b = claude_sessions._PROJECTS_DIR / enc_b
|
||||
dir_a.mkdir()
|
||||
dir_b.mkdir()
|
||||
(dir_a / "old.jsonl").write_text("{}\n")
|
||||
(dir_b / "new.jsonl").write_text("{}\n")
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert sorted(f.name for f in files) == ["new.jsonl", "old.jsonl"]
|
||||
|
||||
|
||||
def test_list_session_files_picks_newest_when_same_name_in_both_variants(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""Same session id under both encoded dirs → take the most recently
|
||||
modified copy. Models the case where Claude Code was upgraded mid-
|
||||
session and re-wrote the same id under the new encoding."""
|
||||
import os
|
||||
import time
|
||||
|
||||
workspace = tmp_path / "My.Wkspace"
|
||||
workspace.mkdir()
|
||||
enc_a = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
enc_b = claude_sessions._encode_variant_b(str(workspace.resolve()))
|
||||
assert enc_a != enc_b
|
||||
dir_a = claude_sessions._PROJECTS_DIR / enc_a
|
||||
dir_b = claude_sessions._PROJECTS_DIR / enc_b
|
||||
dir_a.mkdir()
|
||||
dir_b.mkdir()
|
||||
|
||||
older = dir_a / "shared.jsonl"
|
||||
older.write_text('{"src":"a-old"}\n')
|
||||
# Push older mtime back so the newer write is unambiguously newer.
|
||||
past = time.time() - 3600
|
||||
os.utime(older, (past, past))
|
||||
|
||||
newer = dir_b / "shared.jsonl"
|
||||
newer.write_text('{"src":"b-new"}\n')
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert len(files) == 1
|
||||
assert files[0].read_text() == '{"src":"b-new"}\n'
|
||||
|
||||
|
||||
def test_list_session_files_reads_from_claude_dir(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""When Claude Code wrote sessions to ~/.claude/projects/<enc>/, they
|
||||
show up in the list — even though <workspace>/user/sessions/ is empty."""
|
||||
workspace = tmp_path / "wkspace"
|
||||
workspace.mkdir()
|
||||
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
target = claude_sessions._PROJECTS_DIR / enc
|
||||
target.mkdir()
|
||||
(target / "session-1.jsonl").write_text('{"event":"hi"}\n')
|
||||
(target / "session-2.jsonl").write_text('{"event":"there"}\n')
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert [f.name for f in files] == ["session-1.jsonl", "session-2.jsonl"]
|
||||
# Each file must come from the Claude dir, not legacy.
|
||||
for f in files:
|
||||
assert str(f).startswith(str(target))
|
||||
|
||||
|
||||
def test_list_session_files_falls_back_to_legacy(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""No Claude dir exists, but <workspace>/user/sessions/ does → legacy
|
||||
files are returned (back-compat for hook-managed mirrors)."""
|
||||
workspace = tmp_path / "wkspace"
|
||||
workspace.mkdir()
|
||||
legacy = workspace / "user" / "sessions"
|
||||
legacy.mkdir(parents=True)
|
||||
(legacy / "old.jsonl").write_text('{"event":"legacy"}\n')
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert [f.name for f in files] == ["old.jsonl"]
|
||||
|
||||
|
||||
def test_list_session_files_dedupes_by_name_claude_wins(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""Both Claude dir and legacy dir contain a same-named jsonl. Helper
|
||||
returns one entry, sourced from the Claude dir (live writer)."""
|
||||
workspace = tmp_path / "wkspace"
|
||||
workspace.mkdir()
|
||||
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
target = claude_sessions._PROJECTS_DIR / enc
|
||||
target.mkdir()
|
||||
(target / "shared.jsonl").write_text('{"src":"claude"}\n')
|
||||
|
||||
legacy = workspace / "user" / "sessions"
|
||||
legacy.mkdir(parents=True)
|
||||
(legacy / "shared.jsonl").write_text('{"src":"legacy"}\n')
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert len(files) == 1
|
||||
assert files[0].read_text() == '{"src":"claude"}\n'
|
||||
|
||||
|
||||
def test_list_session_files_unions_when_disjoint(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""Different filenames in each dir → both surface in the result."""
|
||||
workspace = tmp_path / "wkspace"
|
||||
workspace.mkdir()
|
||||
enc = claude_sessions._encode_variant_a(str(workspace.resolve()))
|
||||
target = claude_sessions._PROJECTS_DIR / enc
|
||||
target.mkdir()
|
||||
(target / "fresh.jsonl").write_text("{}\n")
|
||||
|
||||
legacy = workspace / "user" / "sessions"
|
||||
legacy.mkdir(parents=True)
|
||||
(legacy / "old.jsonl").write_text("{}\n")
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert sorted(f.name for f in files) == ["fresh.jsonl", "old.jsonl"]
|
||||
|
||||
|
||||
def test_list_session_files_empty_returns_empty_list(
|
||||
fake_home: Path, tmp_path: Path
|
||||
):
|
||||
"""No sources exist at all → empty list, no mkdir side effect."""
|
||||
workspace = tmp_path / "wkspace"
|
||||
workspace.mkdir()
|
||||
|
||||
files = claude_sessions.list_session_files(workspace)
|
||||
assert files == []
|
||||
assert not (workspace / "user").exists()
|
||||
Loading…
Reference in a new issue