diff --git a/tests/fixtures/analyst_bootstrap.py b/tests/fixtures/analyst_bootstrap.py index a08d9e4..7bf4870 100644 --- a/tests/fixtures/analyst_bootstrap.py +++ b/tests/fixtures/analyst_bootstrap.py @@ -176,6 +176,39 @@ def _seed_db(data_dir: Path) -> dict: source_type="bigquery", bucket="test", source_table=REMOTE_TABLE_ID, query_mode="remote", ) + + # --- Parquet files + sync_state for non-remote tables ----------- + # The manifest builder iterates `sync_state` (not table_registry) and + # `/api/data/{tid}/download` looks up parquet files under + # `data_dir/extracts/.../data/`. Seeding both lets `agnes init` + # exercise the full download path, not just the registry-only stub. + # Each parquet is a single-row DuckDB COPY — minimal but valid (PAR1 + # magic + metadata) so client-side `_is_valid_parquet` passes. + from src.repositories.sync_state import SyncStateRepository + from datetime import datetime, timezone + sync_repo = SyncStateRepository(conn) + extracts_data = data_dir / "extracts" / "test" / "data" + extracts_data.mkdir(parents=True, exist_ok=True) + for tid in (LOCAL_TABLE_ID, MATERIALIZED_TABLE_ID): + parquet_path = extracts_data / f"{tid}.parquet" + # COPY ... TO creates a real parquet via DuckDB's writer. + conn.execute( + f"COPY (SELECT 1 AS id, 'sample' AS label) " + f"TO '{parquet_path}' (FORMAT PARQUET)" + ) + # Compute MD5 the same way `app/api/sync.py:_file_hash` and + # `cli/lib/pull.py:_file_md5` do — chunked 8k reads. + import hashlib + h = hashlib.md5() + with open(parquet_path, "rb") as fh: + for chunk in iter(lambda: fh.read(8192), b""): + h.update(chunk) + sync_repo.update_sync( + table_id=tid, + rows=1, + file_size_bytes=parquet_path.stat().st_size, + hash=h.hexdigest(), + ) finally: conn.close() diff --git a/tests/test_clean_install_integration.py b/tests/test_clean_install_integration.py new file mode 100644 index 0000000..1d31808 --- /dev/null +++ b/tests/test_clean_install_integration.py @@ -0,0 +1,142 @@ +"""End-to-end clean-install integration tests for `agnes init`.""" + +import json +import os +import subprocess +import sys +from pathlib import Path + + +AGNES = [sys.executable, "-m", "cli.main"] + + +def _isolated_env(tmp_path: Path) -> dict: + """Env with `AGNES_CONFIG_DIR` pointing into tmp_path. + + `cli.config.get_token()` reads `~/.config/agnes/token.json` first and + only falls back to `AGNES_TOKEN`. Without this isolation a stale token + on the developer's machine would override the test_pat passed via + `--token`. Same shape as Task 20's `zero_grants_workspace` fixture. + """ + env = os.environ.copy() + config_dir = tmp_path / "agnes-config" + config_dir.mkdir(parents=True, exist_ok=True) + env["AGNES_CONFIG_DIR"] = str(config_dir) + return env + + +def assert_no_dead_dirs(workspace: Path): + """Lazy-mkdir contract: forbidden dirs absent; conditionally-empty dirs only when populated.""" + forbidden_unconditional = ["data/parquet", "data/duckdb", "data/metadata", + "user/artifacts", ".agnes"] + for d in forbidden_unconditional: + assert not (workspace / d).exists(), f"forbidden dir created: {d}" + for d in [".claude/rules", "server/parquet", "user/sessions", "user/snapshots"]: + path = workspace / d + if path.exists(): + assert any(path.iterdir()), f"{d} exists but is empty" + + +def test_clean_install_minimal_grants(fastapi_test_server, tmp_path, test_pat): + """`agnes init` with grants → CLAUDE.md, AGNES_WORKSPACE.md, hooks, parquets, DuckDB.""" + workspace = tmp_path / "ws" + workspace.mkdir() + result = subprocess.run(AGNES + [ + "init", + "--server-url", fastapi_test_server.url, + "--token", test_pat, + "--workspace", str(workspace), + ], env=_isolated_env(tmp_path), capture_output=True, text=True) + assert result.returncode == 0, f"init failed: {result.stderr}" + + # Required files + for must in ["CLAUDE.md", "AGNES_WORKSPACE.md", + ".claude/settings.json", ".claude/CLAUDE.local.md", + "user/duckdb/analytics.duckdb"]: + assert (workspace / must).exists(), f"missing: {must}" + + # Grants → 2 parquets exist (local + materialized; remote is skipped per query_mode) + parquets = list((workspace / "server" / "parquet").glob("*.parquet")) + assert len(parquets) >= 1, f"expected >=1 parquet, got {len(parquets)}: {parquets}" + + # No dead dirs + assert_no_dead_dirs(workspace) + + # Hooks installed + settings = json.loads((workspace / ".claude" / "settings.json").read_text()) + assert any("agnes pull" in h["hooks"][0]["command"] + for h in settings.get("hooks", {}).get("SessionStart", [])) + assert any("agnes push" in h["hooks"][0]["command"] + for h in settings.get("hooks", {}).get("SessionEnd", [])) + + # CLAUDE.md was fetched from /api/welcome (not from local template) + claude_md = (workspace / "CLAUDE.md").read_text() + assert "agnes pull" in claude_md + assert "da sync" not in claude_md # post-rewrite content + + # AGNES_WORKSPACE.md content + workspace_md = (workspace / "AGNES_WORKSPACE.md").read_text() + assert test_pat not in workspace_md, "PAT must not leak into AGNES_WORKSPACE.md" + for placeholder in ["{created_at}", "{server_url}", "{workspace_path}"]: + assert placeholder not in workspace_md, f"placeholder leaked: {placeholder}" + assert fastapi_test_server.url in workspace_md + assert str(workspace) in workspace_md + assert "agnes pull" in workspace_md # cheat sheet uses new verb + + +def test_clean_install_zero_grants(fastapi_test_server, tmp_path, test_pat_no_grants): + """Zero grants → minimal workspace; no parquets, no rules, no dead dirs.""" + workspace = tmp_path / "ws" + workspace.mkdir() + result = subprocess.run(AGNES + [ + "init", + "--server-url", fastapi_test_server.url, + "--token", test_pat_no_grants, + "--workspace", str(workspace), + ], env=_isolated_env(tmp_path), capture_output=True, text=True) + assert result.returncode == 0, f"init failed: {result.stderr}" + + must_exist = {"CLAUDE.md", "AGNES_WORKSPACE.md", + ".claude/settings.json", ".claude/CLAUDE.local.md", + "user/duckdb/analytics.duckdb"} + must_not_exist = {".claude/rules", "server/parquet", "data/parquet", + "data/duckdb", "data/metadata", "user/artifacts", + "user/sessions", "user/snapshots", ".agnes"} + for p in must_exist: + assert (workspace / p).exists(), f"missing: {p}" + for p in must_not_exist: + assert not (workspace / p).exists(), f"unexpected: {p}" + assert_no_dead_dirs(workspace) + + +def test_init_force_preserves_local_md(fastapi_test_server, tmp_path, test_pat): + """`agnes init --force` regenerates CLAUDE.md but never touches CLAUDE.local.md.""" + workspace = tmp_path / "ws" + workspace.mkdir() + env = _isolated_env(tmp_path) + result1 = subprocess.run(AGNES + ["init", + "--server-url", fastapi_test_server.url, + "--token", test_pat, "--workspace", str(workspace)], + env=env, capture_output=True, text=True) + assert result1.returncode == 0, f"first init failed: {result1.stderr}" + (workspace / ".claude" / "CLAUDE.local.md").write_text("# my private notes\n") + + result2 = subprocess.run(AGNES + ["init", + "--server-url", fastapi_test_server.url, + "--token", test_pat, "--workspace", str(workspace), + "--force"], + env=env, capture_output=True, text=True) + assert result2.returncode == 0, f"force init failed: {result2.stderr}" + assert "my private notes" in (workspace / ".claude" / "CLAUDE.local.md").read_text() + + +def test_readers_in_pre_init_dir(tmp_path): + """Reader commands in a folder that never had `agnes init`. Friendly hints, no tracebacks.""" + env = _isolated_env(tmp_path) + for cmd in [AGNES + ["query", "SELECT 1"], + AGNES + ["snapshot", "create", "x", "--as", "y", "--estimate"], + AGNES + ["explore", "x"], + AGNES + ["snapshot", "list"]]: + result = subprocess.run(cmd, cwd=tmp_path, env=env, + capture_output=True, text=True, timeout=15) + assert "Traceback" not in result.stderr, f"{cmd} threw: {result.stderr}"