test: clean-bootstrap fixtures (fastapi_test_server, test_pat, zero_grants_workspace)

Task 20: reusable pytest fixtures for the clean-bootstrap test suite.
Tasks 21 and 22 (reader smoke matrix + init smoke matrix) consume them.

- fastapi_test_server boots a real uvicorn subprocess against a tmp DATA_DIR,
  pre-seeded with admin@example.com (Admin group), analyst@example.com
  (Everyone group), and three tables (one per query_mode: local /
  materialized / remote).
- web_session: cookie-authenticated httpx.Client for the admin user.
- test_pat: minted JWT for the analyst with table grants on local +
  materialized.
- test_pat_no_grants: same shape, zero resource_grants.
- zero_grants_workspace: subprocess invocation of `agnes init` against the
  no-grants PAT; returns the bootstrapped workspace path.
- NONEXISTENT_TABLE: module-level sentinel for the upcoming reader matrix.

Subprocess uvicorn (mirrors tests/test_e2e_corporate_memory.py) instead of
in-thread so DATA_DIR + module-level singletons in src.db don't bleed
across tests. agnes CLI invoked via `python -m cli.main` instead of the
.venv/bin/agnes shim, which depends on .pth file visibility that iCloud
Drive intermittently re-hides on macOS.
This commit is contained in:
ZdenekSrotyr 2026-05-04 19:11:54 +02:00
parent 8d9323c99e
commit a47c2be282
4 changed files with 578 additions and 0 deletions

View file

@ -301,3 +301,21 @@ def bq_access():
yield _build yield _build
from app.main import app as _app from app.main import app as _app
_app.dependency_overrides.pop(get_bq_access, None) _app.dependency_overrides.pop(get_bq_access, None)
# ---------------------------------------------------------------------------
# Clean-bootstrap test suite (Task 20).
#
# Re-export the analyst-bootstrap fixtures so individual test modules can
# request them by name without an explicit import. Imported at module level
# so pytest collection sees the names; the fixtures themselves don't run
# until a test pulls them in.
# ---------------------------------------------------------------------------
from tests.fixtures.analyst_bootstrap import ( # noqa: E402,F401
NONEXISTENT_TABLE,
fastapi_test_server,
test_pat,
test_pat_no_grants,
web_session,
zero_grants_workspace,
)

1
tests/fixtures/__init__.py vendored Normal file
View file

@ -0,0 +1 @@
"""Reusable test fixtures (clean-bootstrap test suite)."""

460
tests/fixtures/analyst_bootstrap.py vendored Normal file
View file

@ -0,0 +1,460 @@
"""Test fixtures for the clean-bootstrap test suite (Task 20).
Boots a real FastAPI server via uvicorn subprocess so end-to-end paths
exercise the same wsgi/asgi stack as production (cookie sessions, PAT
verify, JWT, DB locks). Pre-seeds two users + three tables in the system
DB before the subprocess starts so the test can authenticate immediately.
Subprocess (not in-thread uvicorn): isolates the test's `DATA_DIR` and
the load-bearing module-level singletons in `src.db` (cached system DB
connection) and `app.instance_config` from the parent test runner. The
existing E2E pattern in tests/test_e2e_corporate_memory.py uses the same
shape; we reuse it here for the same reasons.
Public API:
- `fastapi_test_server` yields `_ServerHandle` with `.url` + `.shutdown()`.
- `web_session` `httpx.Client` authenticated as admin via the form-login
endpoint (cookie session).
- `test_pat` string PAT for analyst with grants to the `local` and
`materialized` test tables.
- `test_pat_no_grants` string PAT for analyst with zero grants.
- `zero_grants_workspace` `Path` to a workspace where `agnes init` has
run with `test_pat_no_grants` (no parquets, no rules).
- `NONEXISTENT_TABLE` module constant for Task 21's smoke matrix.
"""
from __future__ import annotations
import os
import socket
import subprocess
import sys
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Optional
import httpx
import pytest
NONEXISTENT_TABLE = "__nonexistent__"
ADMIN_EMAIL = "admin@example.com"
ADMIN_PASSWORD = "test-admin-password-123"
ANALYST_EMAIL = "analyst@example.com"
ANALYST_PASSWORD = "test-analyst-password-123"
# Test table fixtures — one per query_mode the manifest filter cares about.
LOCAL_TABLE_ID = "local_tbl"
MATERIALIZED_TABLE_ID = "materialized_tbl"
REMOTE_TABLE_ID = "remote_tbl"
# ---------------------------------------------------------------------------
# Server handle
# ---------------------------------------------------------------------------
@dataclass
class _ServerHandle:
url: str
data_dir: Path
proc: subprocess.Popen
admin_user_id: str
analyst_user_id: str
everyone_group_id: str
admin_group_id: str
def shutdown(self) -> None:
if self.proc.poll() is None:
self.proc.terminate()
try:
self.proc.wait(timeout=5)
except subprocess.TimeoutExpired:
self.proc.kill()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _find_free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
def _seed_db(data_dir: Path) -> dict:
"""Open system.duckdb under DATA_DIR, run migrations, seed users + tables.
Done before the uvicorn subprocess boots so:
1. The test can immediately log in as admin/analyst (passwords set up).
2. The subprocess inherits a ready DB on first request no race between
startup migrations and the test's first HTTP call.
3. We close the parent's connection at the end so the subprocess can
acquire DuckDB's file lock.
"""
# Restrict the system-db path resolution to our tmp path. _seed_db
# mutates module state in src.db; the caller's get_system_db() cache
# is reset by tests/conftest._reset_module_caches but we still want
# the subprocess child to see the same DATA_DIR.
os.environ["DATA_DIR"] = str(data_dir)
(data_dir / "state").mkdir(parents=True, exist_ok=True)
(data_dir / "analytics").mkdir(parents=True, exist_ok=True)
(data_dir / "extracts").mkdir(parents=True, exist_ok=True)
# Defer imports until after DATA_DIR is set so any module that reads
# the env at import time picks up our path.
import uuid
from argon2 import PasswordHasher
from src.db import (
SYSTEM_ADMIN_GROUP,
SYSTEM_EVERYONE_GROUP,
close_system_db,
get_system_db,
)
from src.repositories.table_registry import TableRegistryRepository
from src.repositories.user_group_members import UserGroupMembersRepository
from src.repositories.users import UserRepository
conn = get_system_db()
try:
ph = PasswordHasher()
# --- Users -----------------------------------------------------
users = UserRepository(conn)
admin_id = str(uuid.uuid4())
analyst_id = str(uuid.uuid4())
users.create(
id=admin_id, email=ADMIN_EMAIL, name="Admin Tester",
password_hash=ph.hash(ADMIN_PASSWORD),
)
users.create(
id=analyst_id, email=ANALYST_EMAIL, name="Analyst Tester",
password_hash=ph.hash(ANALYST_PASSWORD),
)
# --- System groups (Admin / Everyone are seeded by _ensure_schema).
admin_group_row = conn.execute(
"SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP],
).fetchone()
everyone_group_row = conn.execute(
"SELECT id FROM user_groups WHERE name = ?",
[SYSTEM_EVERYONE_GROUP],
).fetchone()
assert admin_group_row, "Admin system group not seeded"
assert everyone_group_row, "Everyone system group not seeded"
admin_group_id = admin_group_row[0]
everyone_group_id = everyone_group_row[0]
members = UserGroupMembersRepository(conn)
members.add_member(admin_id, admin_group_id, source="system_seed")
# The analyst is in Everyone so RBAC checks against the Everyone
# group resolve cleanly. Admin is implicitly in Everyone for
# historical reasons but the marketplace filter treats Admin as a
# regular group, so we add it explicitly to be unambiguous.
members.add_member(admin_id, everyone_group_id, source="system_seed")
members.add_member(analyst_id, everyone_group_id, source="system_seed")
# --- Tables (one per query_mode) -------------------------------
tables = TableRegistryRepository(conn)
tables.register(
id=LOCAL_TABLE_ID, name=LOCAL_TABLE_ID,
source_type="keboola", bucket="test",
source_table=LOCAL_TABLE_ID, query_mode="local",
)
tables.register(
id=MATERIALIZED_TABLE_ID, name=MATERIALIZED_TABLE_ID,
source_type="bigquery", bucket="test",
source_table=MATERIALIZED_TABLE_ID, query_mode="materialized",
)
tables.register(
id=REMOTE_TABLE_ID, name=REMOTE_TABLE_ID,
source_type="bigquery", bucket="test",
source_table=REMOTE_TABLE_ID, query_mode="remote",
)
finally:
conn.close()
# CRITICAL: release DuckDB's file lock so the uvicorn subprocess can
# open the DB. The parent's cached connection is held by src.db at
# module level; without close_system_db() the child blocks forever
# on its first get_system_db() call.
close_system_db()
return {
"admin_user_id": admin_id,
"analyst_user_id": analyst_id,
"admin_group_id": admin_group_id,
"everyone_group_id": everyone_group_id,
}
def _wait_for_server(url: str, timeout_s: float = 30.0) -> None:
"""Poll /api/health until the server answers 200 or timeout."""
deadline = time.monotonic() + timeout_s
while time.monotonic() < deadline:
try:
resp = httpx.get(f"{url}/api/health", timeout=1.0)
if resp.status_code == 200:
return
except Exception:
pass
time.sleep(0.2)
raise RuntimeError(f"Server at {url} not ready within {timeout_s}s")
def _login_session(server_url: str, email: str, password: str) -> httpx.Client:
"""Form-login and return an httpx.Client with the access cookie set.
The /auth/password/login/web endpoint sets `access_token` as an
HttpOnly cookie and 302s to /dashboard. Using `follow_redirects=False`
means we capture the cookie without chasing the redirect chain.
"""
client = httpx.Client(base_url=server_url, follow_redirects=False, timeout=10.0)
resp = client.post(
"/auth/password/login/web",
data={"email": email, "password": password},
)
assert resp.status_code == 302, (
f"login expected 302 redirect, got {resp.status_code}: {resp.text[:300]}"
)
# Sanity: the redirect Location must NOT carry ?error=invalid (form-login
# bounces back to /login/password on bad creds with status 302 too).
target = resp.headers.get("location", "")
assert "error=" not in target, f"login failed: redirected to {target}"
return client
def _mint_pat(server_url: str, email: str, password: str, *, name: str) -> str:
"""Log in as the user via web-form, then POST /auth/tokens.
Returns the raw JWT (returned exactly once by the create endpoint).
PATs cannot mint other PATs (require_session_token), so we must use a
cookie session, not a previously-minted PAT.
"""
session = _login_session(server_url, email, password)
try:
resp = session.post(
"/auth/tokens",
json={"name": name, "ttl_seconds": 3600},
)
assert resp.status_code == 201, (
f"PAT mint failed: {resp.status_code} {resp.text[:300]}"
)
token = resp.json().get("token")
assert token and isinstance(token, str), f"no token in response: {resp.text}"
return token
finally:
session.close()
def _grant_table_access(web_session: httpx.Client, group_id: str, table_id: str) -> None:
"""POST /api/admin/grants for `(group, "table", table_id)`.
Idempotent: a 409 from the unique constraint is swallowed so the
fixture can be reused with a pre-existing grant.
"""
resp = web_session.post(
"/api/admin/grants",
json={
"group_id": group_id,
"resource_type": "table",
"resource_id": table_id,
},
)
if resp.status_code not in (201, 409):
raise AssertionError(
f"grant create failed: {resp.status_code} {resp.text[:300]}"
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def fastapi_test_server(tmp_path: Path) -> Iterator[_ServerHandle]:
"""Boot a real FastAPI server in a uvicorn subprocess against tmp_path DATA_DIR.
Pre-seeds:
- Two users: admin@example.com (Admin group) and analyst@example.com
(Everyone group only). Both have argon2-hashed passwords usable via
`/auth/password/login/web`.
- Two system groups (Admin, Everyone) created by `_ensure_schema`.
- Three tables in `table_registry`, one per query_mode (local,
materialized, remote).
The subprocess inherits `DATA_DIR=tmp_path/agnes-data` plus whatever
`JWT_SECRET_KEY` / `TESTING` is in the environment, so the parent
process can verify JWTs against the same secret it issued.
Port is allocated via `_find_free_port` so xdist workers don't
collide. Server is shut down via SIGTERM in the fixture teardown.
"""
data_dir = tmp_path / "agnes-data"
data_dir.mkdir(parents=True, exist_ok=True)
seeded = _seed_db(data_dir)
port = _find_free_port()
url = f"http://127.0.0.1:{port}"
env = os.environ.copy()
env["DATA_DIR"] = str(data_dir)
env["TESTING"] = "1"
env["JWT_SECRET_KEY"] = os.environ.get(
"JWT_SECRET_KEY", "test-secret-key-minimum-32-characters!!"
)
# Disable LOCAL_DEV_MODE — the smoke test must exercise real auth.
env.pop("LOCAL_DEV_MODE", None)
proc = subprocess.Popen(
[sys.executable, "-m", "uvicorn", "app.main:app",
"--host", "127.0.0.1", "--port", str(port), "--log-level", "warning"],
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
try:
_wait_for_server(url)
except RuntimeError:
proc.terminate()
try:
stdout = proc.stdout.read().decode("utf-8", errors="replace") if proc.stdout else ""
except Exception:
stdout = "<unreadable>"
proc.wait(timeout=2)
pytest.fail(f"fastapi_test_server failed to start on {url}\nstdout:\n{stdout[:3000]}")
handle = _ServerHandle(
url=url,
data_dir=data_dir,
proc=proc,
admin_user_id=seeded["admin_user_id"],
analyst_user_id=seeded["analyst_user_id"],
admin_group_id=seeded["admin_group_id"],
everyone_group_id=seeded["everyone_group_id"],
)
try:
yield handle
finally:
handle.shutdown()
@pytest.fixture
def web_session(fastapi_test_server: _ServerHandle) -> Iterator[httpx.Client]:
"""Authenticated httpx.Client (cookie session) for admin@example.com.
Cookies persist across requests on the same client, so subsequent
requests against admin-gated endpoints (e.g. POST /api/admin/grants)
succeed without re-attaching the JWT.
"""
client = _login_session(
fastapi_test_server.url, ADMIN_EMAIL, ADMIN_PASSWORD,
)
try:
yield client
finally:
client.close()
@pytest.fixture
def test_pat(
fastapi_test_server: _ServerHandle,
web_session: httpx.Client,
) -> str:
"""Mint a PAT for analyst@example.com with two table grants.
Grants applied (to the Everyone group, which the analyst is a member
of): `local_tbl` and `materialized_tbl`. The third seeded table
(`remote_tbl`) is intentionally left ungranted so smoke matrices can
distinguish "remote skip" from "no access".
Memory items / mandatory rules: not seeded by this fixture; Tasks 21
and 22 will add them when needed via the same `web_session` admin
client. Keeping memory off the critical path makes the fixture
cheaper and the failure surface smaller.
"""
everyone_id = fastapi_test_server.everyone_group_id
_grant_table_access(web_session, everyone_id, LOCAL_TABLE_ID)
_grant_table_access(web_session, everyone_id, MATERIALIZED_TABLE_ID)
return _mint_pat(
fastapi_test_server.url,
ANALYST_EMAIL, ANALYST_PASSWORD,
name="test-pat-with-grants",
)
@pytest.fixture
def test_pat_no_grants(fastapi_test_server: _ServerHandle) -> str:
"""Mint a PAT for analyst@example.com with no resource_grants.
The analyst is still in the Everyone group (so they can authenticate
and call /api/sync/manifest), but no group they belong to has any
table grants. The manifest will return zero tables; `agnes init`
completes (no manifest_unauthorized error) with an empty workspace.
"""
return _mint_pat(
fastapi_test_server.url,
ANALYST_EMAIL, ANALYST_PASSWORD,
name="test-pat-no-grants",
)
@pytest.fixture
def zero_grants_workspace(
tmp_path: Path,
fastapi_test_server: _ServerHandle,
test_pat_no_grants: str,
) -> Path:
"""Run `agnes init` with the no-grants PAT; return the workspace path.
Subprocess invocation (not in-process Typer call) so the test
exercises the same path the paste-prompt installer uses. The CLI
binary is the editable install at `.venv/bin/agnes`; we pass
`AGNES_CONFIG_DIR=<tmp>/agnes-config` so this test does not stomp on
the developer's `~/.config/agnes/`.
"""
workspace = tmp_path / "workspace"
workspace.mkdir()
config_dir = tmp_path / "agnes-config"
config_dir.mkdir()
# Always invoke via `python -m cli.main` rather than the .venv/bin/agnes
# console-script shim. The shim reads `from cli.main import app`, which
# depends on `_editable_impl_agnes_the_ai_analyst.pth` being on disk and
# discoverable. On macOS + iCloud Drive, the leading-underscore .pth
# files get re-hidden by the system between unrelated tasks and the
# shim then fails with `ModuleNotFoundError: No module named 'cli'`.
# `python -m` uses the same interpreter without depending on .pth-file
# visibility for CLI dispatch, so it is robust against that race.
cmd: list[str] = [sys.executable, "-m", "cli.main"]
env = os.environ.copy()
env["AGNES_CONFIG_DIR"] = str(config_dir)
env["AGNES_LOCAL_DIR"] = str(workspace)
result = subprocess.run(
cmd + [
"init",
"--server-url", fastapi_test_server.url,
"--token", test_pat_no_grants,
"--workspace", str(workspace),
],
env=env,
capture_output=True,
text=True,
timeout=60,
)
assert result.returncode == 0, (
f"agnes init failed (exit={result.returncode}):\n"
f"--- stdout ---\n{result.stdout}\n"
f"--- stderr ---\n{result.stderr}"
)
return workspace

View file

@ -0,0 +1,99 @@
"""Smoke tests for the clean-bootstrap fixtures.
Verifies the fixtures defined in `tests/fixtures/analyst_bootstrap.py`
actually boot a FastAPI server, authenticate sessions, mint usable PATs,
and run `agnes init` end-to-end. Tasks 21 and 22 layer their reader/init
matrices on top of these primitives.
"""
from __future__ import annotations
from pathlib import Path
import httpx
def test_server_boots(fastapi_test_server):
"""The subprocess uvicorn answers /api/health with 200."""
resp = httpx.get(f"{fastapi_test_server.url}/api/health")
assert resp.status_code == 200, resp.text
def test_web_session_authenticates(web_session, fastapi_test_server):
"""Admin cookie session can hit an admin-only endpoint.
GET /api/users requires `require_admin`. A 200 here proves the
session cookie carries through; a 401/403 would mean the form-login
fixture is broken.
"""
resp = web_session.get(f"{fastapi_test_server.url}/api/users")
assert resp.status_code == 200, (
f"expected 200, got {resp.status_code}: {resp.text[:300]}"
)
payload = resp.json()
assert isinstance(payload, list)
emails = {u.get("email") for u in payload}
# Both seeded users appear in the admin list.
assert "admin@example.com" in emails
assert "analyst@example.com" in emails
def test_test_pat_minted(test_pat):
"""test_pat is a non-empty JWT-looking string."""
assert isinstance(test_pat, str)
assert len(test_pat) > 20
# JWT (3 dot-separated base64 segments) — we issue a `typ=pat` JWT.
assert test_pat.count(".") == 2, "PAT does not look like a JWT"
def test_test_pat_no_grants_minted(test_pat_no_grants):
"""test_pat_no_grants also returns a usable JWT string."""
assert isinstance(test_pat_no_grants, str)
assert len(test_pat_no_grants) > 20
assert test_pat_no_grants.count(".") == 2
def test_test_pat_authenticates_against_server(fastapi_test_server, test_pat):
"""The minted PAT successfully authorizes a /api/catalog/tables call.
/api/catalog/tables is the same endpoint `agnes init` step 2 hits to
verify the token, so this is the exact contract the bootstrap path
needs.
"""
resp = httpx.get(
f"{fastapi_test_server.url}/api/catalog/tables",
headers={"Authorization": f"Bearer {test_pat}"},
)
assert resp.status_code == 200, resp.text
def test_zero_grants_workspace_minimal(zero_grants_workspace):
"""`agnes init` with a no-grants PAT produces a minimal workspace.
Expected files (always written):
- CLAUDE.md (rendered from /api/welcome)
- AGNES_WORKSPACE.md (client-side template)
- .claude/settings.json (model + permissions seed)
- user/duckdb/analytics.duckdb (load-bearing artifact for downstream
readers, even with zero parquets)
Expected absences (no grants empty manifest):
- server/parquet/ lazy mkdir, only created when a parquet is
written (none with zero grants).
- .claude/rules/ lazy mkdir, only created when the memory bundle
has at least one mandatory item or non-empty approved list.
"""
ws = Path(zero_grants_workspace)
assert (ws / "CLAUDE.md").exists(), "CLAUDE.md missing"
assert (ws / "AGNES_WORKSPACE.md").exists(), "AGNES_WORKSPACE.md missing"
assert (ws / ".claude" / "settings.json").exists(), "settings.json missing"
assert (ws / "user" / "duckdb" / "analytics.duckdb").exists(), (
"analytics.duckdb missing — downstream `agnes query` won't work"
)
assert not (ws / "server" / "parquet").exists(), (
"zero grants should produce no parquets"
)
assert not (ws / ".claude" / "rules").exists(), (
"zero rules should leave the rules directory absent"
)