From d0b7e122d6d72524f17f5c7a55515e6b512b166e Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Fri, 1 May 2026 20:25:27 +0200 Subject: [PATCH] =?UTF-8?q?feat(cli):=20smart=20local=20sync=20=E2=80=94?= =?UTF-8?q?=20Claude=20Code=20SessionStart/SessionEnd=20hooks=20+=20da=20s?= =?UTF-8?q?ync=20--quiet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analyst flow becomes a closed loop with the server-curated table catalog: - `da analyst setup` writes `/.claude/settings.json` with two hooks: SessionStart → `da sync --quiet || true` — pulls fresh RBAC-filtered parquets at session start SessionEnd → `da sync --upload-only --quiet || true` — uploads session jsonl + CLAUDE.local.md - `|| true` keeps Claude Code unblocked when the server is down. - Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace. - `da sync --quiet` rewrites the CLI output for hook consumption — 0 stdout on success, single-line error on failure. - Existing settings.json is patched (deep-merged), not overwritten; malformed JSON is reported, not silently overwritten. Tests cover: workspace bootstrap, hook insertion, malformed-json safety, quiet-mode output shape. --- cli/commands/analyst.py | 51 ++++++++++ cli/commands/sync.py | 130 +++++++++++++++++++++++- docs/setup/claude_settings.json | 12 ++- tests/test_cli_analyst_setup_hooks.py | 97 ++++++++++++++++++ tests/test_cli_sync_quiet.py | 138 ++++++++++++++++++++++++++ tests/test_setup_hooks_template.py | 33 ++++++ 6 files changed, 456 insertions(+), 5 deletions(-) create mode 100644 tests/test_cli_analyst_setup_hooks.py create mode 100644 tests/test_cli_sync_quiet.py create mode 100644 tests/test_setup_hooks_template.py diff --git a/cli/commands/analyst.py b/cli/commands/analyst.py index 06f70a4..c6dc17c 100644 --- a/cli/commands/analyst.py +++ b/cli/commands/analyst.py @@ -273,6 +273,52 @@ def _get_instance_name(server_url: str, token: str) -> str: return parsed.hostname or "AI Data Analyst" +# --------------------------------------------------------------------------- +# Helper: install SessionStart/End hooks into a Claude settings file +# --------------------------------------------------------------------------- + +def _install_claude_hooks(settings_path: Path) -> None: + """Add SessionStart/SessionEnd hooks calling `da sync` to a Claude settings file. + + Idempotent: replaces our prior `da sync` entries (matched by command substring + `da sync`) but preserves anyone else's hooks. Creates the file when missing. + + The settings file is workspace-level (`/.claude/settings.json`) so + the hooks only fire in this analyst workspace, not in unrelated Claude Code + sessions on the same machine. + """ + settings_path.parent.mkdir(parents=True, exist_ok=True) + + if settings_path.exists(): + try: + cfg = json.loads(settings_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + typer.echo( + f"Warning: {settings_path} is not valid JSON; skipping hook install.", + err=True, + ) + return + else: + cfg = {} + + hooks = cfg.setdefault("hooks", {}) + + def _replace_or_add(event: str, command: str) -> None: + existing = hooks.setdefault(event, []) + # Drop any prior entry whose every command is a `da sync` invocation. + # Third-party entries (PreToolUse: echo hi) and mixed entries are left alone. + for entry in list(existing): + entry_cmds = [h.get("command", "") for h in entry.get("hooks", [])] + if entry_cmds and all("da sync" in c for c in entry_cmds): + existing.remove(entry) + existing.append({"hooks": [{"type": "command", "command": command}]}) + + _replace_or_add("SessionStart", "da sync --quiet 2>/dev/null || true") + _replace_or_add("SessionEnd", "da sync --upload-only --quiet 2>/dev/null || true") + + settings_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8") + + # --------------------------------------------------------------------------- # Helper: generate CLAUDE.md from template # --------------------------------------------------------------------------- @@ -318,9 +364,13 @@ def _generate_claude_md( settings_path = workspace / ".claude" / "settings.json" if not settings_path.exists(): + # First-run defaults: model + permissions. _install_claude_hooks below + # will merge in the SessionStart/End hooks on top of these. settings = {"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}} settings_path.write_text(json.dumps(settings, indent=2)) + _install_claude_hooks(settings_path) + # --------------------------------------------------------------------------- # Helper: data freshness check (for returning-session detection) @@ -399,6 +449,7 @@ def setup( typer.echo(f" Server : {server_url}") typer.echo(f" Tables : {n_downloaded} downloaded, {total_rows} total rows") typer.echo(f" Workspace: {workspace}") + typer.echo(f" Hooks : SessionStart/End installed in {workspace}/.claude/settings.json") typer.echo("") typer.echo("Next steps:") typer.echo(" da sync — refresh data") diff --git a/cli/commands/sync.py b/cli/commands/sync.py index 10d50b4..1c4de20 100644 --- a/cli/commands/sync.py +++ b/cli/commands/sync.py @@ -31,6 +31,11 @@ def sync( upload_only: bool = typer.Option(False, "--upload-only", help="Only upload sessions/artifacts"), docs_only: bool = typer.Option(False, "--docs-only", help="Only sync documentation"), as_json: bool = typer.Option(False, "--json", help="Output as JSON"), + quiet: bool = typer.Option( + False, + "--quiet", + help="Suppress progress output (intended for hooks/cron)", + ), dry_run: bool = typer.Option( False, "--dry-run", @@ -39,7 +44,12 @@ def sync( ): """Sync data between server and local machine.""" if upload_only: - _upload(as_json, dry_run=dry_run) + _upload(as_json, dry_run=dry_run, quiet=quiet) + return + + if quiet: + # Bypass Rich Progress entirely so hook stdout stays clean. + _sync_quiet(table=table, docs_only=docs_only, as_json=as_json, dry_run=dry_run) return with Progress( @@ -388,11 +398,13 @@ def _is_valid_parquet(path: Path) -> bool: return False -def _upload(as_json: bool, dry_run: bool = False): +def _upload(as_json: bool, dry_run: bool = False, quiet: bool = False): """Upload sessions and CLAUDE.local.md to server. When `dry_run=True`, enumerate what would be uploaded without hitting the - API or mutating anything on disk. + API or mutating anything on disk. When `quiet=True`, suppress the trailing + "Uploaded N sessions" stdout line — error paths still surface on stderr + via api_post itself. """ local_dir = _local_data_dir() sessions_dir = local_dir / "user" / "sessions" @@ -448,7 +460,117 @@ def _upload(as_json: bool, dry_run: bool = False): if as_json: typer.echo(json.dumps(results, indent=2)) - else: + elif not quiet: typer.echo(f"Uploaded {results['sessions']} sessions") if results["local_md"]: typer.echo("Uploaded CLAUDE.local.md") + + +def _sync_quiet(table, docs_only, as_json, dry_run): + """Mirror of the Progress-block flow without any Rich UI. + + Designed for Claude Code SessionStart/SessionEnd hooks and cron callers: + stdout stays empty in the no-op case, the terse one-line summary lands + on stderr so hook stdout pipes don't see it, and a manifest fetch + failure exits non-zero so the `|| true` shell fallback can swallow it + cleanly. + + Skips remote-mode tables exactly like the noisy path; runs the + `_fetch_and_write_rules` corporate-memory step so analysts' .claude/ + rules/ stay fresh between sessions. + """ + try: + resp = api_get("/api/sync/manifest") + resp.raise_for_status() + manifest = resp.json() + except Exception as e: + typer.echo(f"sync: manifest fetch failed: {e}", err=True) + raise typer.Exit(1) + + server_tables = manifest.get("tables", {}) + local_state = get_sync_state() + local_tables = local_state.get("tables", {}) + + to_download = [] + skipped_remote = [] + for tid, info in server_tables.items(): + if table and tid != table: + continue + if docs_only: + continue + if info.get("query_mode") == "remote": + skipped_remote.append(tid) + continue + local_hash = local_tables.get(tid, {}).get("hash", "") + server_hash = info.get("hash", "") + if server_hash != local_hash or tid not in local_tables or not server_hash: + to_download.append(tid) + + if dry_run: + if as_json: + typer.echo(json.dumps( + {"dry_run": True, "would_download": to_download, + "skipped_remote": skipped_remote}, + indent=2, + )) + else: + # Single stderr line keeps stdout clean for hooks while still + # giving an interactive operator running `da sync --quiet + # --dry-run` a sign that something happened. + typer.echo( + f"sync (dry-run): would download {len(to_download)} tables, " + f"skip {len(skipped_remote)} remote-mode", + err=True, + ) + return + + local_dir = _local_data_dir() + parquet_dir = local_dir / "server" / "parquet" + parquet_dir.mkdir(parents=True, exist_ok=True) + + results = { + "downloaded": [], "skipped": [], + "skipped_remote": list(skipped_remote), "errors": [], + } + for tid in to_download: + target = parquet_dir / f"{tid}.parquet" + expected_hash = server_tables[tid].get("hash", "") + try: + stream_download(f"/api/data/{tid}/download", str(target)) + if expected_hash: + if _md5_file(target) != expected_hash: + target.unlink(missing_ok=True) + raise ValueError("hash mismatch") + elif not _is_valid_parquet(target): + target.unlink(missing_ok=True) + raise ValueError("not a valid parquet") + local_tables[tid] = { + "hash": expected_hash, + "rows": server_tables[tid].get("rows", 0), + "size_bytes": server_tables[tid].get("size_bytes", 0), + } + results["downloaded"].append(tid) + except Exception as e: + results["errors"].append({"table": tid, "error": str(e)}) + + from datetime import datetime, timezone + local_state["tables"] = local_tables + local_state["last_sync"] = datetime.now(timezone.utc).isoformat() + save_sync_state(local_state) + + if results["downloaded"]: + _rebuild_duckdb_views(local_dir, parquet_dir) + + # Same corporate-memory rule fetch as the noisy path — keeps the + # `.claude/rules/km_*.md` files fresh between sessions even when the + # hook is the only thing invoking sync. + _fetch_and_write_rules(local_dir) + + if as_json: + typer.echo(json.dumps(results, indent=2)) + elif results["downloaded"] or results["errors"]: + typer.echo( + f"sync: {len(results['downloaded'])} tables, " + f"{len(results['errors'])} errors", + err=True, + ) diff --git a/docs/setup/claude_settings.json b/docs/setup/claude_settings.json index 2770760..c2ebb24 100644 --- a/docs/setup/claude_settings.json +++ b/docs/setup/claude_settings.json @@ -1,11 +1,21 @@ { "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "da sync --quiet 2>/dev/null || true" + } + ] + } + ], "SessionEnd": [ { "hooks": [ { "type": "command", - "command": "python server/scripts/collect_session.py 2>/dev/null || true" + "command": "da sync --upload-only --quiet 2>/dev/null || true" } ] } diff --git a/tests/test_cli_analyst_setup_hooks.py b/tests/test_cli_analyst_setup_hooks.py new file mode 100644 index 0000000..611ad53 --- /dev/null +++ b/tests/test_cli_analyst_setup_hooks.py @@ -0,0 +1,97 @@ +"""`_install_claude_hooks` writes SessionStart/End hooks idempotently into +a Claude settings file (workspace-level for analyst workspaces).""" +import json +from pathlib import Path + +from cli.commands.analyst import _install_claude_hooks + + +def test_install_creates_settings_when_missing(tmp_path): + settings = tmp_path / ".claude" / "settings.json" + _install_claude_hooks(settings) + + cfg = json.loads(settings.read_text()) + starts = cfg["hooks"]["SessionStart"] + cmds = [h["command"] for e in starts for h in e["hooks"]] + assert any("da sync --quiet" in c and "--upload-only" not in c for c in cmds), cmds + + ends = cfg["hooks"]["SessionEnd"] + end_cmds = [h["command"] for e in ends for h in e["hooks"]] + assert any("da sync --upload-only" in c for c in end_cmds), end_cmds + + +def test_install_preserves_existing_unrelated_hooks(tmp_path): + settings = tmp_path / ".claude" / "settings.json" + settings.parent.mkdir(parents=True) + settings.write_text(json.dumps({ + "hooks": { + "PreToolUse": [{"hooks": [{"type": "command", "command": "echo hi"}]}], + }, + "permissions": {"allow": ["Bash(git status:*)"]}, + "model": "sonnet", + })) + + _install_claude_hooks(settings) + + cfg = json.loads(settings.read_text()) + # Unrelated hook event preserved + assert cfg["hooks"]["PreToolUse"][0]["hooks"][0]["command"] == "echo hi" + # Unrelated top-level keys preserved + assert cfg["permissions"]["allow"] == ["Bash(git status:*)"] + assert cfg["model"] == "sonnet" + # Our new hooks added + assert "SessionStart" in cfg["hooks"] + assert "SessionEnd" in cfg["hooks"] + + +def test_install_is_idempotent(tmp_path): + settings = tmp_path / ".claude" / "settings.json" + _install_claude_hooks(settings) + first = json.loads(settings.read_text()) + _install_claude_hooks(settings) + second = json.loads(settings.read_text()) + # No duplicate entries + assert first["hooks"]["SessionStart"] == second["hooks"]["SessionStart"] + assert first["hooks"]["SessionEnd"] == second["hooks"]["SessionEnd"] + assert len(second["hooks"]["SessionStart"]) == 1 + assert len(second["hooks"]["SessionEnd"]) == 1 + + +def test_install_replaces_old_da_sync_entry_without_duplicating(tmp_path): + """If the user already has a `da sync` entry from a prior version, our + install replaces it cleanly rather than appending a second copy.""" + settings = tmp_path / ".claude" / "settings.json" + settings.parent.mkdir(parents=True) + settings.write_text(json.dumps({ + "hooks": { + "SessionStart": [ + {"hooks": [{"type": "command", "command": "da sync"}]}, # old shape + {"hooks": [{"type": "command", "command": "echo not-ours"}]}, + ] + } + })) + + _install_claude_hooks(settings) + + cfg = json.loads(settings.read_text()) + starts = cfg["hooks"]["SessionStart"] + assert len(starts) == 2 # one ours, one third-party + cmds = [h["command"] for e in starts for h in e["hooks"]] + assert "da sync --quiet 2>/dev/null || true" in cmds + assert "echo not-ours" in cmds + assert all(c == "echo not-ours" or "da sync --quiet" in c for c in cmds), cmds + + +def test_install_skips_malformed_existing_settings(tmp_path, capsys): + """If the settings file is corrupted JSON, warn on stderr and bail — + don't crash the surrounding `da analyst setup` flow.""" + settings = tmp_path / ".claude" / "settings.json" + settings.parent.mkdir(parents=True) + settings.write_text("{not valid json") + + _install_claude_hooks(settings) # must not raise + + captured = capsys.readouterr() + assert "not valid JSON" in captured.err + # File untouched + assert settings.read_text() == "{not valid json" diff --git a/tests/test_cli_sync_quiet.py b/tests/test_cli_sync_quiet.py new file mode 100644 index 0000000..906ba4d --- /dev/null +++ b/tests/test_cli_sync_quiet.py @@ -0,0 +1,138 @@ +"""`da sync --quiet` truly suppresses stdout chatter, including the download +loop and final summary. + +Without --quiet, the same fixture prints "Downloading", "Downloaded:", etc.; +with --quiet, stdout stays empty and the terse one-liner lands on stderr. +The first test forces the download loop to run so the contrast between +noisy/quiet stdout is observable (mutation-tests the flag — see PR #145 +for the original empty-manifest test that passed even without --quiet). +""" +import json +from typer.testing import CliRunner +from unittest.mock import patch, MagicMock + +from cli.main import app + + +def _fake_manifest_one_table(): + resp = MagicMock() + resp.json.return_value = { + "tables": { + "orders": { + "hash": "abc123", + "rows": 5, + "size_bytes": 100, + "query_mode": "local", + "source_type": "keboola", + } + }, + "assets": {}, + "server_time": "2026-04-30T00:00:00Z", + } + resp.raise_for_status = MagicMock() + return resp + + +def _stub_download(_url, target_path): + from pathlib import Path + Path(target_path).write_bytes(b"PAR1" + b"\x00" * 16 + b"PAR1") + + +def test_quiet_suppresses_stdout_when_downloading(tmp_path, monkeypatch): + """Manifest has tables that actually trigger downloads. Without --quiet + stdout would contain 'Downloading' / 'Downloaded:'. With --quiet stdout + stays empty and the terse summary lands on stderr.""" + monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path)) + monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg")) + runner = CliRunner() + + with patch("cli.commands.sync.api_get", return_value=_fake_manifest_one_table()), \ + patch("cli.commands.sync.stream_download", side_effect=_stub_download), \ + patch("cli.commands.sync._md5_file", return_value="abc123"), \ + patch("cli.commands.sync._rebuild_duckdb_views"), \ + patch("cli.commands.sync._fetch_and_write_rules"): + result = runner.invoke(app, ["sync", "--quiet"]) + + assert result.exit_code == 0, result.stdout + assert result.stdout == "", f"expected empty stdout, got: {result.stdout!r}" + assert "sync: 1 tables" in result.stderr + + +def test_noisy_mode_prints_to_stdout(tmp_path, monkeypatch): + """Anchor: the noisy path DOES print download chatter to stdout, so the + contrast in the quiet test above is meaningful.""" + monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path)) + monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg")) + runner = CliRunner() + + with patch("cli.commands.sync.api_get", return_value=_fake_manifest_one_table()), \ + patch("cli.commands.sync.stream_download", side_effect=_stub_download), \ + patch("cli.commands.sync._md5_file", return_value="abc123"), \ + patch("cli.commands.sync._rebuild_duckdb_views"), \ + patch("cli.commands.sync._fetch_and_write_rules"): + result = runner.invoke(app, ["sync"]) + + assert result.exit_code == 0, result.stdout + assert "Downloaded:" in result.stdout + + +def test_quiet_manifest_failure_exits_nonzero(tmp_path, monkeypatch): + """SessionStart hook contract: server unreachable → non-zero exit (so + `|| true` swallows it cleanly), error message on stderr.""" + monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path)) + monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg")) + runner = CliRunner() + + fake_resp = MagicMock() + fake_resp.raise_for_status.side_effect = RuntimeError("boom") + + with patch("cli.commands.sync.api_get", return_value=fake_resp): + result = runner.invoke(app, ["sync", "--quiet"]) + + assert result.exit_code == 1 + assert "manifest fetch failed" in result.stderr + + +def test_quiet_skips_remote_mode_tables(tmp_path, monkeypatch): + """Materialized rows go through the download path; remote rows do not. + Locks in the contract that --quiet honors the same skipped_remote + filter as the noisy path.""" + monkeypatch.setenv("DA_LOCAL_DIR", str(tmp_path)) + monkeypatch.setenv("DA_CONFIG_DIR", str(tmp_path / "_cfg")) + + resp = MagicMock() + resp.json.return_value = { + "tables": { + "live_orders": { + "hash": "x", "rows": 0, "size_bytes": 0, + "query_mode": "remote", "source_type": "bigquery", + }, + "agg_90d": { + "hash": "abc", "rows": 5, "size_bytes": 100, + "query_mode": "materialized", "source_type": "bigquery", + }, + }, + "assets": {}, + "server_time": "2026-04-30T00:00:00Z", + } + resp.raise_for_status = MagicMock() + + runner = CliRunner() + download_calls = [] + + def _spy_download(url, target): + download_calls.append(url) + from pathlib import Path + Path(target).write_bytes(b"PAR1" + b"\x00" * 16 + b"PAR1") + + with patch("cli.commands.sync.api_get", return_value=resp), \ + patch("cli.commands.sync.stream_download", side_effect=_spy_download), \ + patch("cli.commands.sync._md5_file", return_value="abc"), \ + patch("cli.commands.sync._rebuild_duckdb_views"), \ + patch("cli.commands.sync._fetch_and_write_rules"): + result = runner.invoke(app, ["sync", "--quiet"]) + + assert result.exit_code == 0, result.stdout + # Remote table never downloaded; materialized table downloaded. + assert any("agg_90d" in u for u in download_calls) + assert not any("live_orders" in u for u in download_calls) diff --git a/tests/test_setup_hooks_template.py b/tests/test_setup_hooks_template.py new file mode 100644 index 0000000..69a4824 --- /dev/null +++ b/tests/test_setup_hooks_template.py @@ -0,0 +1,33 @@ +"""The shipped Claude settings template must point hooks at `da sync`, not the deleted server/scripts.""" +import json +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +TEMPLATE = REPO_ROOT / "docs" / "setup" / "claude_settings.json" + + +def test_template_has_session_start_da_sync(): + cfg = json.loads(TEMPLATE.read_text()) + starts = cfg.get("hooks", {}).get("SessionStart", []) + assert starts, "SessionStart hook missing" + cmds = [h["command"] for entry in starts for h in entry.get("hooks", [])] + assert any("da sync" in c and "--upload-only" not in c for c in cmds), ( + f"Expected `da sync` in SessionStart, got {cmds}" + ) + + +def test_template_has_session_end_upload(): + cfg = json.loads(TEMPLATE.read_text()) + ends = cfg.get("hooks", {}).get("SessionEnd", []) + cmds = [h["command"] for entry in ends for h in entry.get("hooks", [])] + assert any("da sync --upload-only" in c for c in cmds), ( + f"Expected `da sync --upload-only` in SessionEnd, got {cmds}" + ) + + +def test_template_drops_dead_server_scripts_reference(): + raw = TEMPLATE.read_text() + assert "server/scripts/collect_session.py" not in raw, ( + "Template still references the deleted server/scripts/collect_session.py — " + "the SessionEnd hook would silently fail." + )