diff --git a/app/api/claude_md.py b/app/api/claude_md.py index dcb9d82..25e6934 100644 --- a/app/api/claude_md.py +++ b/app/api/claude_md.py @@ -65,6 +65,25 @@ _VALIDATION_STUB_CONTEXT_ANON = { } +# Substrings that, when found in an admin-saved CLAUDE.md override, signal +# the override is stale relative to the post-clean-bootstrap CLI surface. +# Surfaced via TemplateGetResponse.legacy_strings_detected so the admin UI +# can render a yellow banner prompting re-authoring. +_LEGACY_STRINGS = ( + "data/parquet", + "da sync", + "da fetch", + "da analyst setup", + "da metrics list", + "da metrics show", +) + + +def _scan_legacy_strings(text: str) -> list[str]: + """Return sorted unique substrings from _LEGACY_STRINGS present in text.""" + return sorted({s for s in _LEGACY_STRINGS if s in text}) + + class ClaudeMdResponse(BaseModel): content: str @@ -74,6 +93,10 @@ class TemplateGetResponse(BaseModel): default: str # live default rendered with calling admin's context updated_at: Optional[str] = None updated_by: Optional[str] = None + # Substrings from _LEGACY_STRINGS detected in the saved override (if any). + # Empty when no override is set or when the override is clean. Surfaced + # so the admin UI can prompt re-authoring after a CLI surface rename. + legacy_strings_detected: list[str] = [] class TemplatePutRequest(BaseModel): @@ -130,11 +153,13 @@ async def admin_get_workspace_template( row = ClaudeMdTemplateRepository(conn).get() server_url = str(request.base_url).rstrip("/") live_default = compute_default_claude_md(conn, user=user, server_url=server_url) + legacy_hits = _scan_legacy_strings(row["content"] or "") return TemplateGetResponse( content=row["content"], default=live_default, updated_at=row["updated_at"].isoformat() if row["updated_at"] else None, updated_by=row["updated_by"], + legacy_strings_detected=legacy_hits, ) diff --git a/tests/test_legacy_strings_scan.py b/tests/test_legacy_strings_scan.py new file mode 100644 index 0000000..73937b9 --- /dev/null +++ b/tests/test_legacy_strings_scan.py @@ -0,0 +1,123 @@ +"""Tests for legacy-string scan in admin CLAUDE.md template endpoint. + +The scanner flags admin-saved CLAUDE.md overrides that still reference the +pre-clean-bootstrap CLI surface (`da sync`, `da fetch`, `data/parquet/`, +`da analyst setup`, `da metrics list/show`). The admin UI surfaces the hits +as a yellow banner so operators know to re-author the override; the scanner +itself is informational only — saves with legacy strings are still accepted. +""" + +from __future__ import annotations + +import tempfile +import uuid + +import pytest +from fastapi.testclient import TestClient + +from app.api.claude_md import _LEGACY_STRINGS, _scan_legacy_strings + + +# --------------------------------------------------------------------------- +# Unit tests — pure-function behaviour +# --------------------------------------------------------------------------- + + +def test_scan_finds_all_known_legacy_strings(): + text = """ + Run `da sync` then `da fetch web_sessions --where ...`. + Old workspace at data/parquet/ — see `da analyst setup`. + Use `da metrics list` and `da metrics show `. + """ + hits = _scan_legacy_strings(text) + assert "da sync" in hits + assert "da fetch" in hits + assert "data/parquet" in hits + assert "da analyst setup" in hits + assert "da metrics list" in hits + assert "da metrics show" in hits + + +def test_scan_returns_empty_for_clean_text(): + text = "Use `agnes pull` to refresh, `agnes snapshot create` for ad-hoc, `server/parquet/`." + assert _scan_legacy_strings(text) == [] + + +def test_scan_returns_unique_sorted_hits(): + text = "da sync da sync data/parquet/ data/parquet/foo" + hits = _scan_legacy_strings(text) + assert hits == sorted(set(hits)) + + +def test_legacy_strings_constant_shape(): + assert isinstance(_LEGACY_STRINGS, tuple) + assert all(isinstance(s, str) for s in _LEGACY_STRINGS) + assert "da sync" in _LEGACY_STRINGS + assert "data/parquet" in _LEGACY_STRINGS + + +# --------------------------------------------------------------------------- +# HTTP-level tests — admin GET surfaces detected hits +# +# Lifts the Bearer-session pattern from tests/test_tokens_bootstrap_scope.py +# (Task 1) — Task 20's shared `web_session` cookie fixture isn't built yet, +# but the endpoint surface we're exercising is identical either way. +# --------------------------------------------------------------------------- + + +@pytest.fixture +def fresh_db(monkeypatch): + with tempfile.TemporaryDirectory() as tmp: + monkeypatch.setenv("DATA_DIR", tmp) + monkeypatch.setenv("TESTING", "1") + monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!") + yield tmp + + +@pytest.fixture +def web_session(fresh_db): + """TestClient authenticated as an admin user via a Bearer session JWT.""" + from app.auth.jwt import create_access_token + from app.main import app + from src.db import close_system_db, get_system_db + from src.repositories.users import UserRepository + from tests.helpers.auth import grant_admin + + conn = get_system_db() + try: + uid = str(uuid.uuid4()) + UserRepository(conn).create(id=uid, email="admin@example.com", name="Admin") + grant_admin(conn, uid) + sess_token = create_access_token(user_id=uid, email="admin@example.com") + finally: + conn.close() + close_system_db() + + client = TestClient(app) + client.headers.update({"Authorization": f"Bearer {sess_token}"}) + return client + + +def test_admin_get_template_returns_legacy_strings_when_override_dirty(web_session): + """Setting an override containing legacy strings populates the field.""" + put = web_session.put( + "/api/admin/workspace-prompt-template", + json={"content": "Run `da sync` and check data/parquet/."}, + ) + assert put.status_code == 200, put.text + resp = web_session.get("/api/admin/workspace-prompt-template") + assert resp.status_code == 200, resp.text + body = resp.json() + assert "da sync" in body["legacy_strings_detected"] + assert "data/parquet" in body["legacy_strings_detected"] + + +def test_admin_get_template_returns_empty_when_clean(web_session): + put = web_session.put( + "/api/admin/workspace-prompt-template", + json={"content": "Use `agnes pull` and check `server/parquet/`."}, + ) + assert put.status_code == 200, put.text + resp = web_session.get("/api/admin/workspace-prompt-template") + assert resp.status_code == 200, resp.text + assert resp.json()["legacy_strings_detected"] == []