test: clean-install integration suite (minimal/zero grants, force, pre-init)

This commit is contained in:
ZdenekSrotyr 2026-05-04 19:22:24 +02:00
parent 42e108ae5e
commit 8403529fcd
2 changed files with 175 additions and 0 deletions

View file

@ -176,6 +176,39 @@ def _seed_db(data_dir: Path) -> dict:
source_type="bigquery", bucket="test",
source_table=REMOTE_TABLE_ID, query_mode="remote",
)
# --- Parquet files + sync_state for non-remote tables -----------
# The manifest builder iterates `sync_state` (not table_registry) and
# `/api/data/{tid}/download` looks up parquet files under
# `data_dir/extracts/.../data/`. Seeding both lets `agnes init`
# exercise the full download path, not just the registry-only stub.
# Each parquet is a single-row DuckDB COPY — minimal but valid (PAR1
# magic + metadata) so client-side `_is_valid_parquet` passes.
from src.repositories.sync_state import SyncStateRepository
from datetime import datetime, timezone
sync_repo = SyncStateRepository(conn)
extracts_data = data_dir / "extracts" / "test" / "data"
extracts_data.mkdir(parents=True, exist_ok=True)
for tid in (LOCAL_TABLE_ID, MATERIALIZED_TABLE_ID):
parquet_path = extracts_data / f"{tid}.parquet"
# COPY ... TO creates a real parquet via DuckDB's writer.
conn.execute(
f"COPY (SELECT 1 AS id, 'sample' AS label) "
f"TO '{parquet_path}' (FORMAT PARQUET)"
)
# Compute MD5 the same way `app/api/sync.py:_file_hash` and
# `cli/lib/pull.py:_file_md5` do — chunked 8k reads.
import hashlib
h = hashlib.md5()
with open(parquet_path, "rb") as fh:
for chunk in iter(lambda: fh.read(8192), b""):
h.update(chunk)
sync_repo.update_sync(
table_id=tid,
rows=1,
file_size_bytes=parquet_path.stat().st_size,
hash=h.hexdigest(),
)
finally:
conn.close()

View file

@ -0,0 +1,142 @@
"""End-to-end clean-install integration tests for `agnes init`."""
import json
import os
import subprocess
import sys
from pathlib import Path
AGNES = [sys.executable, "-m", "cli.main"]
def _isolated_env(tmp_path: Path) -> dict:
"""Env with `AGNES_CONFIG_DIR` pointing into tmp_path.
`cli.config.get_token()` reads `~/.config/agnes/token.json` first and
only falls back to `AGNES_TOKEN`. Without this isolation a stale token
on the developer's machine would override the test_pat passed via
`--token`. Same shape as Task 20's `zero_grants_workspace` fixture.
"""
env = os.environ.copy()
config_dir = tmp_path / "agnes-config"
config_dir.mkdir(parents=True, exist_ok=True)
env["AGNES_CONFIG_DIR"] = str(config_dir)
return env
def assert_no_dead_dirs(workspace: Path):
"""Lazy-mkdir contract: forbidden dirs absent; conditionally-empty dirs only when populated."""
forbidden_unconditional = ["data/parquet", "data/duckdb", "data/metadata",
"user/artifacts", ".agnes"]
for d in forbidden_unconditional:
assert not (workspace / d).exists(), f"forbidden dir created: {d}"
for d in [".claude/rules", "server/parquet", "user/sessions", "user/snapshots"]:
path = workspace / d
if path.exists():
assert any(path.iterdir()), f"{d} exists but is empty"
def test_clean_install_minimal_grants(fastapi_test_server, tmp_path, test_pat):
"""`agnes init` with grants → CLAUDE.md, AGNES_WORKSPACE.md, hooks, parquets, DuckDB."""
workspace = tmp_path / "ws"
workspace.mkdir()
result = subprocess.run(AGNES + [
"init",
"--server-url", fastapi_test_server.url,
"--token", test_pat,
"--workspace", str(workspace),
], env=_isolated_env(tmp_path), capture_output=True, text=True)
assert result.returncode == 0, f"init failed: {result.stderr}"
# Required files
for must in ["CLAUDE.md", "AGNES_WORKSPACE.md",
".claude/settings.json", ".claude/CLAUDE.local.md",
"user/duckdb/analytics.duckdb"]:
assert (workspace / must).exists(), f"missing: {must}"
# Grants → 2 parquets exist (local + materialized; remote is skipped per query_mode)
parquets = list((workspace / "server" / "parquet").glob("*.parquet"))
assert len(parquets) >= 1, f"expected >=1 parquet, got {len(parquets)}: {parquets}"
# No dead dirs
assert_no_dead_dirs(workspace)
# Hooks installed
settings = json.loads((workspace / ".claude" / "settings.json").read_text())
assert any("agnes pull" in h["hooks"][0]["command"]
for h in settings.get("hooks", {}).get("SessionStart", []))
assert any("agnes push" in h["hooks"][0]["command"]
for h in settings.get("hooks", {}).get("SessionEnd", []))
# CLAUDE.md was fetched from /api/welcome (not from local template)
claude_md = (workspace / "CLAUDE.md").read_text()
assert "agnes pull" in claude_md
assert "da sync" not in claude_md # post-rewrite content
# AGNES_WORKSPACE.md content
workspace_md = (workspace / "AGNES_WORKSPACE.md").read_text()
assert test_pat not in workspace_md, "PAT must not leak into AGNES_WORKSPACE.md"
for placeholder in ["{created_at}", "{server_url}", "{workspace_path}"]:
assert placeholder not in workspace_md, f"placeholder leaked: {placeholder}"
assert fastapi_test_server.url in workspace_md
assert str(workspace) in workspace_md
assert "agnes pull" in workspace_md # cheat sheet uses new verb
def test_clean_install_zero_grants(fastapi_test_server, tmp_path, test_pat_no_grants):
"""Zero grants → minimal workspace; no parquets, no rules, no dead dirs."""
workspace = tmp_path / "ws"
workspace.mkdir()
result = subprocess.run(AGNES + [
"init",
"--server-url", fastapi_test_server.url,
"--token", test_pat_no_grants,
"--workspace", str(workspace),
], env=_isolated_env(tmp_path), capture_output=True, text=True)
assert result.returncode == 0, f"init failed: {result.stderr}"
must_exist = {"CLAUDE.md", "AGNES_WORKSPACE.md",
".claude/settings.json", ".claude/CLAUDE.local.md",
"user/duckdb/analytics.duckdb"}
must_not_exist = {".claude/rules", "server/parquet", "data/parquet",
"data/duckdb", "data/metadata", "user/artifacts",
"user/sessions", "user/snapshots", ".agnes"}
for p in must_exist:
assert (workspace / p).exists(), f"missing: {p}"
for p in must_not_exist:
assert not (workspace / p).exists(), f"unexpected: {p}"
assert_no_dead_dirs(workspace)
def test_init_force_preserves_local_md(fastapi_test_server, tmp_path, test_pat):
"""`agnes init --force` regenerates CLAUDE.md but never touches CLAUDE.local.md."""
workspace = tmp_path / "ws"
workspace.mkdir()
env = _isolated_env(tmp_path)
result1 = subprocess.run(AGNES + ["init",
"--server-url", fastapi_test_server.url,
"--token", test_pat, "--workspace", str(workspace)],
env=env, capture_output=True, text=True)
assert result1.returncode == 0, f"first init failed: {result1.stderr}"
(workspace / ".claude" / "CLAUDE.local.md").write_text("# my private notes\n")
result2 = subprocess.run(AGNES + ["init",
"--server-url", fastapi_test_server.url,
"--token", test_pat, "--workspace", str(workspace),
"--force"],
env=env, capture_output=True, text=True)
assert result2.returncode == 0, f"force init failed: {result2.stderr}"
assert "my private notes" in (workspace / ".claude" / "CLAUDE.local.md").read_text()
def test_readers_in_pre_init_dir(tmp_path):
"""Reader commands in a folder that never had `agnes init`. Friendly hints, no tracebacks."""
env = _isolated_env(tmp_path)
for cmd in [AGNES + ["query", "SELECT 1"],
AGNES + ["snapshot", "create", "x", "--as", "y", "--estimate"],
AGNES + ["explore", "x"],
AGNES + ["snapshot", "list"]]:
result = subprocess.run(cmd, cwd=tmp_path, env=env,
capture_output=True, text=True, timeout=15)
assert "Traceback" not in result.stderr, f"{cmd} threw: {result.stderr}"