feat(cli): smart local sync — Claude Code SessionStart/SessionEnd hooks + da sync --quiet

The analyst flow becomes a closed loop with the server-curated table catalog:

  - `da analyst setup` writes `<workspace>/.claude/settings.json` with two hooks:
      SessionStart → `da sync --quiet || true`        — pulls fresh RBAC-filtered parquets at session start
      SessionEnd   → `da sync --upload-only --quiet || true` — uploads session jsonl + CLAUDE.local.md
  - `|| true` keeps Claude Code unblocked when the server is down.
  - Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace.
  - `da sync --quiet` rewrites the CLI output for hook consumption — 0 stdout on success, single-line error on failure.
  - Existing settings.json is patched (deep-merged), not overwritten; malformed JSON is reported, not silently overwritten.

Tests cover: workspace bootstrap, hook insertion, malformed-json safety, quiet-mode output shape.
This commit is contained in:
ZdenekSrotyr 2026-05-01 20:25:27 +02:00
parent c364f65127
commit d0b7e122d6
6 changed files with 456 additions and 5 deletions

View file

@ -273,6 +273,52 @@ def _get_instance_name(server_url: str, token: str) -> str:
return parsed.hostname or "AI Data Analyst" return parsed.hostname or "AI Data Analyst"
# ---------------------------------------------------------------------------
# Helper: install SessionStart/End hooks into a Claude settings file
# ---------------------------------------------------------------------------
def _install_claude_hooks(settings_path: Path) -> None:
"""Add SessionStart/SessionEnd hooks calling `da sync` to a Claude settings file.
Idempotent: replaces our prior `da sync` entries (matched by command substring
`da sync`) but preserves anyone else's hooks. Creates the file when missing.
The settings file is workspace-level (`<workspace>/.claude/settings.json`) so
the hooks only fire in this analyst workspace, not in unrelated Claude Code
sessions on the same machine.
"""
settings_path.parent.mkdir(parents=True, exist_ok=True)
if settings_path.exists():
try:
cfg = json.loads(settings_path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
typer.echo(
f"Warning: {settings_path} is not valid JSON; skipping hook install.",
err=True,
)
return
else:
cfg = {}
hooks = cfg.setdefault("hooks", {})
def _replace_or_add(event: str, command: str) -> None:
existing = hooks.setdefault(event, [])
# Drop any prior entry whose every command is a `da sync` invocation.
# Third-party entries (PreToolUse: echo hi) and mixed entries are left alone.
for entry in list(existing):
entry_cmds = [h.get("command", "") for h in entry.get("hooks", [])]
if entry_cmds and all("da sync" in c for c in entry_cmds):
existing.remove(entry)
existing.append({"hooks": [{"type": "command", "command": command}]})
_replace_or_add("SessionStart", "da sync --quiet 2>/dev/null || true")
_replace_or_add("SessionEnd", "da sync --upload-only --quiet 2>/dev/null || true")
settings_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Helper: generate CLAUDE.md from template # Helper: generate CLAUDE.md from template
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -318,9 +364,13 @@ def _generate_claude_md(
settings_path = workspace / ".claude" / "settings.json" settings_path = workspace / ".claude" / "settings.json"
if not settings_path.exists(): if not settings_path.exists():
# First-run defaults: model + permissions. _install_claude_hooks below
# will merge in the SessionStart/End hooks on top of these.
settings = {"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}} settings = {"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}}
settings_path.write_text(json.dumps(settings, indent=2)) settings_path.write_text(json.dumps(settings, indent=2))
_install_claude_hooks(settings_path)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Helper: data freshness check (for returning-session detection) # Helper: data freshness check (for returning-session detection)
@ -399,6 +449,7 @@ def setup(
typer.echo(f" Server : {server_url}") typer.echo(f" Server : {server_url}")
typer.echo(f" Tables : {n_downloaded} downloaded, {total_rows} total rows") typer.echo(f" Tables : {n_downloaded} downloaded, {total_rows} total rows")
typer.echo(f" Workspace: {workspace}") typer.echo(f" Workspace: {workspace}")
typer.echo(f" Hooks : SessionStart/End installed in {workspace}/.claude/settings.json")
typer.echo("") typer.echo("")
typer.echo("Next steps:") typer.echo("Next steps:")
typer.echo(" da sync — refresh data") typer.echo(" da sync — refresh data")

View file

@ -31,6 +31,11 @@ def sync(
upload_only: bool = typer.Option(False, "--upload-only", help="Only upload sessions/artifacts"), upload_only: bool = typer.Option(False, "--upload-only", help="Only upload sessions/artifacts"),
docs_only: bool = typer.Option(False, "--docs-only", help="Only sync documentation"), docs_only: bool = typer.Option(False, "--docs-only", help="Only sync documentation"),
as_json: bool = typer.Option(False, "--json", help="Output as JSON"), as_json: bool = typer.Option(False, "--json", help="Output as JSON"),
quiet: bool = typer.Option(
False,
"--quiet",
help="Suppress progress output (intended for hooks/cron)",
),
dry_run: bool = typer.Option( dry_run: bool = typer.Option(
False, False,
"--dry-run", "--dry-run",
@ -39,7 +44,12 @@ def sync(
): ):
"""Sync data between server and local machine.""" """Sync data between server and local machine."""
if upload_only: if upload_only:
_upload(as_json, dry_run=dry_run) _upload(as_json, dry_run=dry_run, quiet=quiet)
return
if quiet:
# Bypass Rich Progress entirely so hook stdout stays clean.
_sync_quiet(table=table, docs_only=docs_only, as_json=as_json, dry_run=dry_run)
return return
with Progress( with Progress(
@ -388,11 +398,13 @@ def _is_valid_parquet(path: Path) -> bool:
return False return False
def _upload(as_json: bool, dry_run: bool = False): def _upload(as_json: bool, dry_run: bool = False, quiet: bool = False):
"""Upload sessions and CLAUDE.local.md to server. """Upload sessions and CLAUDE.local.md to server.
When `dry_run=True`, enumerate what would be uploaded without hitting the When `dry_run=True`, enumerate what would be uploaded without hitting the
API or mutating anything on disk. API or mutating anything on disk. When `quiet=True`, suppress the trailing
"Uploaded N sessions" stdout line error paths still surface on stderr
via api_post itself.
""" """
local_dir = _local_data_dir() local_dir = _local_data_dir()
sessions_dir = local_dir / "user" / "sessions" sessions_dir = local_dir / "user" / "sessions"
@ -448,7 +460,117 @@ def _upload(as_json: bool, dry_run: bool = False):
if as_json: if as_json:
typer.echo(json.dumps(results, indent=2)) typer.echo(json.dumps(results, indent=2))
else: elif not quiet:
typer.echo(f"Uploaded {results['sessions']} sessions") typer.echo(f"Uploaded {results['sessions']} sessions")
if results["local_md"]: if results["local_md"]:
typer.echo("Uploaded CLAUDE.local.md") typer.echo("Uploaded CLAUDE.local.md")
def _sync_quiet(table, docs_only, as_json, dry_run):
"""Mirror of the Progress-block flow without any Rich UI.
Designed for Claude Code SessionStart/SessionEnd hooks and cron callers:
stdout stays empty in the no-op case, the terse one-line summary lands
on stderr so hook stdout pipes don't see it, and a manifest fetch
failure exits non-zero so the `|| true` shell fallback can swallow it
cleanly.
Skips remote-mode tables exactly like the noisy path; runs the
`_fetch_and_write_rules` corporate-memory step so analysts' .claude/
rules/ stay fresh between sessions.
"""
try:
resp = api_get("/api/sync/manifest")
resp.raise_for_status()
manifest = resp.json()
except Exception as e:
typer.echo(f"sync: manifest fetch failed: {e}", err=True)
raise typer.Exit(1)
server_tables = manifest.get("tables", {})
local_state = get_sync_state()
local_tables = local_state.get("tables", {})
to_download = []
skipped_remote = []
for tid, info in server_tables.items():
if table and tid != table:
continue
if docs_only:
continue
if info.get("query_mode") == "remote":
skipped_remote.append(tid)
continue
local_hash = local_tables.get(tid, {}).get("hash", "")
server_hash = info.get("hash", "")
if server_hash != local_hash or tid not in local_tables or not server_hash:
to_download.append(tid)
if dry_run:
if as_json:
typer.echo(json.dumps(
{"dry_run": True, "would_download": to_download,
"skipped_remote": skipped_remote},
indent=2,
))
else:
# Single stderr line keeps stdout clean for hooks while still
# giving an interactive operator running `da sync --quiet
# --dry-run` a sign that something happened.
typer.echo(
f"sync (dry-run): would download {len(to_download)} tables, "
f"skip {len(skipped_remote)} remote-mode",
err=True,
)
return
local_dir = _local_data_dir()
parquet_dir = local_dir / "server" / "parquet"
parquet_dir.mkdir(parents=True, exist_ok=True)
results = {
"downloaded": [], "skipped": [],
"skipped_remote": list(skipped_remote), "errors": [],
}
for tid in to_download:
target = parquet_dir / f"{tid}.parquet"
expected_hash = server_tables[tid].get("hash", "")
try:
stream_download(f"/api/data/{tid}/download", str(target))
if expected_hash:
if _md5_file(target) != expected_hash:
target.unlink(missing_ok=True)
raise ValueError("hash mismatch")
elif not _is_valid_parquet(target):
target.unlink(missing_ok=True)
raise ValueError("not a valid parquet")
local_tables[tid] = {
"hash": expected_hash,
"rows": server_tables[tid].get("rows", 0),
"size_bytes": server_tables[tid].get("size_bytes", 0),
}
results["downloaded"].append(tid)
except Exception as e:
results["errors"].append({"table": tid, "error": str(e)})
from datetime import datetime, timezone
local_state["tables"] = local_tables
local_state["last_sync"] = datetime.now(timezone.utc).isoformat()
save_sync_state(local_state)
if results["downloaded"]:
_rebuild_duckdb_views(local_dir, parquet_dir)
# Same corporate-memory rule fetch as the noisy path — keeps the
# `.claude/rules/km_*.md` files fresh between sessions even when the
# hook is the only thing invoking sync.
_fetch_and_write_rules(local_dir)
if as_json:
typer.echo(json.dumps(results, indent=2))
elif results["downloaded"] or results["errors"]:
typer.echo(
f"sync: {len(results['downloaded'])} tables, "
f"{len(results['errors'])} errors",
err=True,
)

View file

@ -1,11 +1,21 @@
{ {
"hooks": { "hooks": {
"SessionStart": [
{
"hooks": [
{
"type": "command",
"command": "da sync --quiet 2>/dev/null || true"
}
]
}
],
"SessionEnd": [ "SessionEnd": [
{ {
"hooks": [ "hooks": [
{ {
"type": "command", "type": "command",
"command": "python server/scripts/collect_session.py 2>/dev/null || true" "command": "da sync --upload-only --quiet 2>/dev/null || true"
} }
] ]
} }

View file

@ -0,0 +1,97 @@
"""`_install_claude_hooks` writes SessionStart/End hooks idempotently into
a Claude settings file (workspace-level for analyst workspaces)."""
import json
from pathlib import Path
from cli.commands.analyst import _install_claude_hooks
def test_install_creates_settings_when_missing(tmp_path):
settings = tmp_path / ".claude" / "settings.json"
_install_claude_hooks(settings)
cfg = json.loads(settings.read_text())
starts = cfg["hooks"]["SessionStart"]
cmds = [h["command"] for e in starts for h in e["hooks"]]
assert any("da sync --quiet" in c and "--upload-only" not in c for c in cmds), cmds
ends = cfg["hooks"]["SessionEnd"]
end_cmds = [h["command"] for e in ends for h in e["hooks"]]
assert any("da sync --upload-only" in c for c in end_cmds), end_cmds
def test_install_preserves_existing_unrelated_hooks(tmp_path):
settings = tmp_path / ".claude" / "settings.json"
settings.parent.mkdir(parents=True)
settings.write_text(json.dumps({
"hooks": {
"PreToolUse": [{"hooks": [{"type": "command", "command": "echo hi"}]}],
},
"permissions": {"allow": ["Bash(git status:*)"]},
"model": "sonnet",
}))
_install_claude_hooks(settings)
cfg = json.loads(settings.read_text())
# Unrelated hook event preserved
assert cfg["hooks"]["PreToolUse"][0]["hooks"][0]["command"] == "echo hi"
# Unrelated top-level keys preserved
assert cfg["permissions"]["allow"] == ["Bash(git status:*)"]
assert cfg["model"] == "sonnet"
# Our new hooks added
assert "SessionStart" in cfg["hooks"]
assert "SessionEnd" in cfg["hooks"]
def test_install_is_idempotent(tmp_path):
settings = tmp_path / ".claude" / "settings.json"
_install_claude_hooks(settings)
first = json.loads(settings.read_text())
_install_claude_hooks(settings)
second = json.loads(settings.read_text())
# No duplicate entries
assert first["hooks"]["SessionStart"] == second["hooks"]["SessionStart"]
assert first["hooks"]["SessionEnd"] == second["hooks"]["SessionEnd"]
assert len(second["hooks"]["SessionStart"]) == 1
assert len(second["hooks"]["SessionEnd"]) == 1
def test_install_replaces_old_da_sync_entry_without_duplicating(tmp_path):
"""If the user already has a `da sync` entry from a prior version, our
install replaces it cleanly rather than appending a second copy."""
settings = tmp_path / ".claude" / "settings.json"
settings.parent.mkdir(parents=True)
settings.write_text(json.dumps({
"hooks": {
"SessionStart": [
{"hooks": [{"type": "command", "command": "da sync"}]}, # old shape
{"hooks": [{"type": "command", "command": "echo not-ours"}]},
]
}
}))
_install_claude_hooks(settings)
cfg = json.loads(settings.read_text())
starts = cfg["hooks"]["SessionStart"]
assert len(starts) == 2 # one ours, one third-party
cmds = [h["command"] for e in starts for h in e["hooks"]]
assert "da sync --quiet 2>/dev/null || true" in cmds
assert "echo not-ours" in cmds
assert all(c == "echo not-ours" or "da sync --quiet" in c for c in cmds), cmds
def test_install_skips_malformed_existing_settings(tmp_path, capsys):
"""If the settings file is corrupted JSON, warn on stderr and bail —
don't crash the surrounding `da analyst setup` flow."""
settings = tmp_path / ".claude" / "settings.json"
settings.parent.mkdir(parents=True)
settings.write_text("{not valid json")
_install_claude_hooks(settings) # must not raise
captured = capsys.readouterr()
assert "not valid JSON" in captured.err
# File untouched
assert settings.read_text() == "{not valid json"

View file

@ -0,0 +1,138 @@
"""`da sync --quiet` truly suppresses stdout chatter, including the download
loop and final summary.
Without --quiet, the same fixture prints "Downloading", "Downloaded:", etc.;
with --quiet, stdout stays empty and the terse one-liner lands on stderr.
The first test forces the download loop to run so the contrast between
noisy/quiet stdout is observable (mutation-tests the flag see PR #145
for the original empty-manifest test that passed even without --quiet).
"""
import json
from typer.testing import CliRunner
from unittest.mock import patch, MagicMock
from cli.main import app
def _fake_manifest_one_table():
resp = MagicMock()
resp.json.return_value = {
"tables": {
"orders": {
"hash": "abc123",
"rows": 5,
"size_bytes": 100,
"query_mode": "local",
"source_type": "keboola",
}
},
"assets": {},
"server_time": "2026-04-30T00:00:00Z",
}
resp.raise_for_status = MagicMock()
return resp
def _stub_download(_url, target_path):
from pathlib import Path
Path(target_path).write_bytes(b"PAR1" + b"\x00" * 16 + b"PAR1")
def test_quiet_suppresses_stdout_when_downloading(tmp_path, monkeypatch):
"""Manifest has tables that actually trigger downloads. Without --quiet
stdout would contain 'Downloading' / 'Downloaded:'. With --quiet stdout
stays empty and the terse summary lands on stderr."""
monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path))
monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg"))
runner = CliRunner()
with patch("cli.commands.sync.api_get", return_value=_fake_manifest_one_table()), \
patch("cli.commands.sync.stream_download", side_effect=_stub_download), \
patch("cli.commands.sync._md5_file", return_value="abc123"), \
patch("cli.commands.sync._rebuild_duckdb_views"), \
patch("cli.commands.sync._fetch_and_write_rules"):
result = runner.invoke(app, ["sync", "--quiet"])
assert result.exit_code == 0, result.stdout
assert result.stdout == "", f"expected empty stdout, got: {result.stdout!r}"
assert "sync: 1 tables" in result.stderr
def test_noisy_mode_prints_to_stdout(tmp_path, monkeypatch):
"""Anchor: the noisy path DOES print download chatter to stdout, so the
contrast in the quiet test above is meaningful."""
monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path))
monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg"))
runner = CliRunner()
with patch("cli.commands.sync.api_get", return_value=_fake_manifest_one_table()), \
patch("cli.commands.sync.stream_download", side_effect=_stub_download), \
patch("cli.commands.sync._md5_file", return_value="abc123"), \
patch("cli.commands.sync._rebuild_duckdb_views"), \
patch("cli.commands.sync._fetch_and_write_rules"):
result = runner.invoke(app, ["sync"])
assert result.exit_code == 0, result.stdout
assert "Downloaded:" in result.stdout
def test_quiet_manifest_failure_exits_nonzero(tmp_path, monkeypatch):
"""SessionStart hook contract: server unreachable → non-zero exit (so
`|| true` swallows it cleanly), error message on stderr."""
monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path))
monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg"))
runner = CliRunner()
fake_resp = MagicMock()
fake_resp.raise_for_status.side_effect = RuntimeError("boom")
with patch("cli.commands.sync.api_get", return_value=fake_resp):
result = runner.invoke(app, ["sync", "--quiet"])
assert result.exit_code == 1
assert "manifest fetch failed" in result.stderr
def test_quiet_skips_remote_mode_tables(tmp_path, monkeypatch):
"""Materialized rows go through the download path; remote rows do not.
Locks in the contract that --quiet honors the same skipped_remote
filter as the noisy path."""
monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path))
monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg"))
resp = MagicMock()
resp.json.return_value = {
"tables": {
"live_orders": {
"hash": "x", "rows": 0, "size_bytes": 0,
"query_mode": "remote", "source_type": "bigquery",
},
"agg_90d": {
"hash": "abc", "rows": 5, "size_bytes": 100,
"query_mode": "materialized", "source_type": "bigquery",
},
},
"assets": {},
"server_time": "2026-04-30T00:00:00Z",
}
resp.raise_for_status = MagicMock()
runner = CliRunner()
download_calls = []
def _spy_download(url, target):
download_calls.append(url)
from pathlib import Path
Path(target).write_bytes(b"PAR1" + b"\x00" * 16 + b"PAR1")
with patch("cli.commands.sync.api_get", return_value=resp), \
patch("cli.commands.sync.stream_download", side_effect=_spy_download), \
patch("cli.commands.sync._md5_file", return_value="abc"), \
patch("cli.commands.sync._rebuild_duckdb_views"), \
patch("cli.commands.sync._fetch_and_write_rules"):
result = runner.invoke(app, ["sync", "--quiet"])
assert result.exit_code == 0, result.stdout
# Remote table never downloaded; materialized table downloaded.
assert any("agg_90d" in u for u in download_calls)
assert not any("live_orders" in u for u in download_calls)

View file

@ -0,0 +1,33 @@
"""The shipped Claude settings template must point hooks at `da sync`, not the deleted server/scripts."""
import json
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
TEMPLATE = REPO_ROOT / "docs" / "setup" / "claude_settings.json"
def test_template_has_session_start_da_sync():
cfg = json.loads(TEMPLATE.read_text())
starts = cfg.get("hooks", {}).get("SessionStart", [])
assert starts, "SessionStart hook missing"
cmds = [h["command"] for entry in starts for h in entry.get("hooks", [])]
assert any("da sync" in c and "--upload-only" not in c for c in cmds), (
f"Expected `da sync` in SessionStart, got {cmds}"
)
def test_template_has_session_end_upload():
cfg = json.loads(TEMPLATE.read_text())
ends = cfg.get("hooks", {}).get("SessionEnd", [])
cmds = [h["command"] for entry in ends for h in entry.get("hooks", [])]
assert any("da sync --upload-only" in c for c in cmds), (
f"Expected `da sync --upload-only` in SessionEnd, got {cmds}"
)
def test_template_drops_dead_server_scripts_reference():
raw = TEMPLATE.read_text()
assert "server/scripts/collect_session.py" not in raw, (
"Template still references the deleted server/scripts/collect_session.py — "
"the SessionEnd hook would silently fail."
)