feat(db,repo,renderer): schema v23 + claude_md_template + ClaudeMd renderer

- Bump SCHEMA_VERSION 22 → 23; add claude_md_template singleton table to _SYSTEM_SCHEMA and _V22_TO_V23_MIGRATIONS; wire migration + fresh-install seed - src/repositories/claude_md_template.py: ClaudeMdTemplateRepository (get/set/reset) mirroring WelcomeTemplateRepository; defensive re-seed in get() - src/claude_md.py: compute_default_claude_md / render_claude_md / build_claude_md_context — rich renderer with RBAC-filtered tables, metrics, and marketplaces; reads override from claude_md_template or falls back to config/claude_md_template.txt; raises TemplateError on broken override - config/claude_md_template.txt: default Jinja2 markdown template restored from PR #167 history (tables, metrics, marketplaces, BQ guidance, corporate memory, directory structure, per-user footer)
2026-05-03 22:43:56 +02:00 · 2026-05-03 22:43:56 +02:00 · f01eb4143d
commit f01eb4143d
parent 53f841f244
7 changed files with 671 additions and 5 deletions
--- a/config/claude_md_template.txt
+++ b/config/claude_md_template.txt
@ -0,0 +1,152 @@
+{# Default analyst-onboarding workspace prompt for "da analyst setup".
+   Rendered server-side by src/claude_md.py. Edit this file to change
+   the OSS default; admins override per-instance via /admin/workspace-prompt.
+
+   Available context (see docs/agent-workspace-prompt.md for the full reference):
+     instance.name, instance.subtitle
+     server.url, server.hostname
+     sync_interval                — string from instance.yaml
+     data_source.type             — keboola | bigquery | local
+     tables                       — list of {name, description, query_mode}
+     metrics.count, metrics.categories
+     marketplaces                 — list of {slug, name, plugins:[{name}]}
+     user.id, user.email, user.name, user.is_admin, user.groups
+     now, today                   — datetime / date string
+#}
+# {{ instance.name }} — AI Data Analyst
+
+This workspace is connected to {{ server.url }}.
+{% if instance.subtitle %}Operated by **{{ instance.subtitle }}**.{% endif %}
+
+## Rules
+- Before computing any business metric: run `da metrics show <category>/<name>`
+- **For canonical table list with query modes: `da catalog`.** `data/metadata/schema.json` covers `query_mode: "local"` tables only — for remote/hybrid tables it's incomplete. Treat `da catalog` as source of truth.
+- Do not use DESCRIBE/SHOW COLUMNS — use `da schema <table>` instead
+- Save work output to `user/artifacts/`
+- Sync data regularly with `da sync`
+- **Personal customizations go in `.claude/CLAUDE.local.md`, NOT here.** This file is regenerated by `da analyst setup --force`; edits here will be lost. CLAUDE.local.md is preserved across regeneration and uploaded on `da sync --upload-only`.
+
+## Metrics Workflow
+1. `da metrics list` — find the relevant metric ({{ metrics.count }} available, categories: {{ metrics.categories | join(", ") or "none yet" }})
+2. `da metrics show <category>/<name>` — read SQL and business rules
+3. Use the canonical SQL from the metric definition, adapt to the question
+4. Never invent metric calculations — always check existing definitions first
+
+## Data Sync
+- `da sync` — download current data from server
+- `da sync --docs-only` — just metadata and metrics (fast refresh)
+- `da sync --upload-only` — upload sessions and local notes to server
+- Data on the server refreshes every {{ sync_interval }}
+
+## Available Datasets
+{% for t in tables -%}
+- `{{ t.name }}`{% if t.description %} — {{ t.description }}{% endif %}{% if t.query_mode == "remote" %} *(remote, queried on demand)*{% endif %}
+{% else -%}
+- _No tables registered yet — ask an admin to register tables in the dashboard._
+{% endfor %}
+
+{% if marketplaces -%}
+## Plugins available to you
+{% for mp in marketplaces -%}
+- **{{ mp.name }}** ({{ mp.slug }}): {{ mp.plugins | map(attribute="name") | join(", ") }}
+{% endfor %}
+{% endif -%}
+
+## Remote Queries (BigQuery) — when data isn't on the laptop
+
+Not every table is synced. Tables registered with `query_mode: "remote"` live in
+BigQuery, accessed server-side via DuckDB's BQ extension — no parquet on disk.
+Tables you don't see in `data/parquet/` may still be queryable.
+
+### Discovery first
+
+```
+da catalog --json | jq '.[] | {name, source_type, query_mode}'   # see all tables + their modes
+da schema <table>                                                # columns + types
+da describe <table> -n 5                                         # sample rows
+```
+
+For local-mode tables, query directly with `da query "SELECT … FROM <table>"`.
+
+### Three patterns for `query_mode: "remote"` tables
+
+| Pattern | Tool | Use when |
+|---------|------|----------|
+| **`da fetch`** (preferred) | materializes a filtered subset locally → query the snapshot | repeated questions on same slice |
+| **`da query --remote`** | one-shot, server-side execution against BigQuery | single aggregate / cheap probe |
+| **`da query --register-bq`** | hybrid joins between local snapshots and ad-hoc BQ subqueries | crossing local + remote |
+
+### Permission model + cost — important
+
+- BQ access goes through the **agnes server's GCE service account**, not your personal Google credentials. If a query fails with a permission error, the table is in a project the server SA cannot read — escalate to admin, do NOT try to authenticate yourself.
+- Every BQ query bills the SA's GCP project for **bytes scanned**. A naive `SELECT * FROM <large_table>` can cost real money. ALWAYS:
+  - filter via `--where` on the partition column (typically a date)
+  - list specific columns in `--select` — column-store BQ skips the rest, cheaper
+  - run `--estimate` first when unsure of the table size or partitioning
+
+### `da fetch` discipline
+
+```
+# 1. ESTIMATE first — refuses to fetch without knowing the cost
+da fetch <table> --select col1,col2 --where "date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)" --estimate
+
+# 2. If reasonable, fetch as a named snapshot
+da fetch <table> --select col1,col2 --where "..." --as my_recent
+
+# 3. Query the local snapshot
+da query "SELECT col1, COUNT(*) FROM my_recent GROUP BY 1"
+
+# 4. List + drop snapshots when done
+da snapshot list
+da snapshot drop my_recent
+```
+
+Rules of thumb:
+- ALWAYS list specific columns in `--select`. Avoid implicit SELECT *.
+- ALWAYS include a `--where` for remote tables; otherwise add `--limit`.
+- ALWAYS run `--estimate` first when the table is `partition_by` / `clustered_by`
+  per `da schema`, or could plausibly exceed 1 GB local bytes.
+- Reuse snapshots across questions in the same conversation — `da snapshot list`
+  before fetching.
+
+### Snapshot freshness — when to refresh
+
+Snapshots are point-in-time copies. They go stale as the source data updates. For each new conversation:
+
+```
+da snapshot list                            # see existing snapshots + their ages
+da snapshot drop my_recent                  # drop stale ones
+da fetch <table> --select ... --where ... --as my_recent   # re-fetch
+```
+
+### BigQuery SQL flavor for `--where`
+
+Source-typed `bigquery` tables use BigQuery dialect, not DuckDB:
+
+- Date literal: `DATE '2026-01-01'`
+- Timestamp literal: `TIMESTAMP '2026-01-01 00:00:00 UTC'`
+- Now: `CURRENT_DATE()`, `CURRENT_TIMESTAMP()`
+- Date arithmetic: `DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)`
+- Regex: `REGEXP_CONTAINS(col, r'pattern')` (raw string!)
+- Cast: `CAST(x AS INT64)` (NOT `INT`)
+
+## Corporate Memory
+
+Rules injected by `da sync` from the server's corporate knowledge base live in `.claude/rules/km_*.md`. They are automatically loaded by Claude Code on every session start.
+
+- `km_<id>.md` — mandatory rules (always enforced)
+- `km_approved.md` — approved guidance (confidence × recency ranked)
+
+Run `da sync` to refresh. Rules are pruned automatically when items are revoked.
+
+## Directory Structure
+- `data/` — read-only data downloaded from server
+  - `data/parquet/` — table data in Parquet format
+  - `data/duckdb/` — local analytics DuckDB database
+  - `data/metadata/` — profiles, schema, metrics cache
+- `user/` — your workspace (persistent across syncs)
+  - `user/artifacts/` — analysis outputs, reports, charts
+  - `user/sessions/` — Claude Code session logs
+- `.claude/CLAUDE.local.md` — your personal notes + workspace customizations. **Never overwritten by `da analyst setup --force`.** Uploaded to the server on `da sync --upload-only`. Put any local-only Claude instructions, project-specific reminders, or temporary notes here — NOT in CLAUDE.md (this file is regenerated from a template).
+
+_Hello {{ user.name or user.email }} — generated {{ today }}._
--- a/src/claude_md.py
+++ b/src/claude_md.py
@ -0,0 +1,200 @@
+"""Render the analyst-workspace CLAUDE.md prompt.
+
+The template source is admin-editable at /admin/workspace-prompt.  When no
+override is set, the default content is the Jinja2 markdown template shipped
+at config/claude_md_template.txt.  When an override is saved, it replaces the
+default for every call to render_claude_md().
+
+Override content is a Jinja2 template (autoescape=False, StrictUndefined).
+Available placeholders: instance.{name,subtitle}, server.{url,hostname},
+sync_interval, data_source.type, tables (list), metrics.{count,categories},
+marketplaces (RBAC-filtered list), user.{id,email,name,is_admin,groups},
+now, today.
+
+See also: surfaced as the "Agent Workspace Prompt" admin editor at
+/admin/workspace-prompt.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import date, datetime, timezone
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+import duckdb
+from jinja2 import Environment, StrictUndefined, TemplateError
+
+from app.instance_config import (
+    get_data_source_type,
+    get_instance_name,
+    get_instance_subtitle,
+    get_sync_interval,
+)
+from src.repositories.claude_md_template import ClaudeMdTemplateRepository
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_TEMPLATE_PATH = (
+    Path(__file__).resolve().parent.parent / "config" / "claude_md_template.txt"
+)
+
+
+def _load_default_template() -> str:
+    if _DEFAULT_TEMPLATE_PATH.exists():
+        return _DEFAULT_TEMPLATE_PATH.read_text(encoding="utf-8")
+    # Last-resort embedded fallback if the OSS template file is missing
+    # from the install (e.g., partial Docker COPY).
+    return (
+        "# {{ instance.name }} — AI Data Analyst\n\n"
+        "This workspace is connected to {{ server.url }}.\n"
+        "Data refreshes every {{ sync_interval }}.\n"
+    )
+
+
+def _list_tables(conn: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
+    try:
+        rows = conn.execute(
+            """SELECT name, description, query_mode
+               FROM table_registry
+               ORDER BY name"""
+        ).fetchall()
+    except duckdb.CatalogException:
+        return []
+    return [
+        {"name": r[0], "description": r[1] or "", "query_mode": r[2] or "local"}
+        for r in rows
+    ]
+
+
+def _metrics_summary(conn: duckdb.DuckDBPyConnection) -> dict[str, Any]:
+    try:
+        rows = conn.execute(
+            "SELECT category, COUNT(*) FROM metric_definitions GROUP BY category"
+        ).fetchall()
+    except duckdb.CatalogException:
+        return {"count": 0, "categories": []}
+    return {
+        "count": sum(r[1] for r in rows),
+        "categories": sorted({r[0] for r in rows if r[0]}),
+    }
+
+
+def _marketplaces_for_user(
+    conn: duckdb.DuckDBPyConnection, user: dict[str, Any]
+) -> list[dict[str, Any]]:
+    """Return marketplaces with the plugins the user is allowed to see.
+
+    Delegates RBAC filtering entirely to resolve_allowed_plugins, which
+    returns List[dict] with marketplace_slug, original_name, etc.
+    Results are grouped by marketplace slug; display names are fetched
+    from marketplace_registry in a single query.
+    """
+    try:
+        from src.marketplace_filter import resolve_allowed_plugins
+        allowed = resolve_allowed_plugins(conn, user)
+    except Exception:
+        logger.exception("_marketplaces_for_user: marketplace plugin resolution failed")
+        return []
+    if not allowed:
+        return []
+
+    # Build slug → display name lookup from registry
+    slugs = list({p["marketplace_slug"] for p in allowed})
+    placeholders = ",".join(["?"] * len(slugs))
+    try:
+        name_rows = conn.execute(
+            f"SELECT id, name FROM marketplace_registry WHERE id IN ({placeholders})",
+            slugs,
+        ).fetchall()
+    except duckdb.CatalogException:
+        name_rows = []
+    slug_to_name: dict[str, str] = {r[0]: r[1] for r in name_rows}
+
+    grouped: dict[str, dict[str, Any]] = {}
+    for plugin in allowed:
+        slug = plugin["marketplace_slug"]
+        bucket = grouped.setdefault(
+            slug,
+            {
+                "slug": slug,
+                "name": slug_to_name.get(slug, slug),
+                "plugins": [],
+            },
+        )
+        bucket["plugins"].append({"name": plugin["original_name"]})
+
+    return list(grouped.values())
+
+
+def build_claude_md_context(
+    conn: duckdb.DuckDBPyConnection,
+    *,
+    user: dict[str, Any],
+    server_url: str,
+) -> dict[str, Any]:
+    """Compose the Jinja2 render context for the CLAUDE.md template. Pure, no side effects."""
+    now = datetime.now(timezone.utc)
+    parsed = urlparse(server_url)
+    return {
+        "instance": {
+            "name": get_instance_name(),
+            "subtitle": get_instance_subtitle(),
+        },
+        "server": {
+            "url": server_url,
+            "hostname": parsed.hostname or "",
+        },
+        "sync_interval": get_sync_interval(),
+        "data_source": {"type": get_data_source_type()},
+        "tables": _list_tables(conn),
+        "metrics": _metrics_summary(conn),
+        "marketplaces": _marketplaces_for_user(conn, user),
+        "user": {
+            "id": user.get("id", ""),
+            "email": user.get("email", ""),
+            "name": user.get("name") or "",
+            "is_admin": bool(user.get("is_admin")),
+            "groups": user.get("groups") or [],
+        },
+        "now": now,
+        "today": date.today().isoformat(),
+    }
+
+
+def compute_default_claude_md(
+    conn: duckdb.DuckDBPyConnection,
+    *,
+    user: dict[str, Any],
+    server_url: str,
+) -> str:
+    """Return the rendered default CLAUDE.md from config/claude_md_template.txt.
+
+    Renders the shipped Jinja2 template with the given user's RBAC context.
+    On TemplateError, raises — callers that want graceful fallback should catch.
+    """
+    source = _load_default_template()
+    env = Environment(undefined=StrictUndefined, autoescape=False)
+    template = env.from_string(source)
+    return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
+
+
+def render_claude_md(
+    conn: duckdb.DuckDBPyConnection,
+    *,
+    user: dict[str, Any],
+    server_url: str,
+) -> str:
+    """Resolve the active template (override or default) and render it for the given user.
+
+    When an admin override is set, renders it via Jinja2 (StrictUndefined, autoescape=False).
+    When no override is set, renders the shipped default template.
+
+    On TemplateError, raises — the API layer catches this and returns 400/500.
+    """
+    row = ClaudeMdTemplateRepository(conn).get()
+    source = row["content"] if row.get("content") else _load_default_template()
+    env = Environment(undefined=StrictUndefined, autoescape=False)
+    template = env.from_string(source)
+    return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
--- a/src/db.py
+++ b/src/db.py
@ -39,7 +39,7 @@ def _maybe_instrument(con, db_tag: str):

 _SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")

-SCHEMA_VERSION = 22
+SCHEMA_VERSION = 23

 _SYSTEM_SCHEMA = """
 CREATE TABLE IF NOT EXISTS schema_version (
@ -427,6 +427,18 @@ CREATE TABLE IF NOT EXISTS setup_banner (
    updated_by VARCHAR,
    CONSTRAINT singleton CHECK (id = 1)
 );
+
+-- v23: customizable analyst-workspace CLAUDE.md template.
+-- Singleton row (id=1). NULL content means "use the default template
+-- shipped at config/claude_md_template.txt" (Jinja2 markdown). Admin override
+-- stores the raw Jinja2 source string.
+CREATE TABLE IF NOT EXISTS claude_md_template (
+    id INTEGER PRIMARY KEY DEFAULT 1,
+    content TEXT,
+    updated_at TIMESTAMP,
+    updated_by VARCHAR,
+    CONSTRAINT singleton CHECK (id = 1)
+);
 """


@ -1658,6 +1670,17 @@ _V21_TO_V22_MIGRATIONS = [
    "INSERT INTO setup_banner (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
 ]

+_V22_TO_V23_MIGRATIONS = [
+    """CREATE TABLE IF NOT EXISTS claude_md_template (
+        id INTEGER PRIMARY KEY DEFAULT 1,
+        content TEXT,
+        updated_at TIMESTAMP,
+        updated_by VARCHAR,
+        CONSTRAINT singleton CHECK (id = 1)
+    )""",
+    "INSERT INTO claude_md_template (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
+]
+

 def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
    """Create tables if they don't exist. Apply migrations if schema version changed.
@ -1724,6 +1747,10 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
                "INSERT INTO setup_banner (id, content) VALUES (1, NULL) "
                "ON CONFLICT (id) DO NOTHING"
            )
+            conn.execute(
+                "INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
+                "ON CONFLICT (id) DO NOTHING"
+            )
            # Fresh-install seed is handled by the unconditional
            # _seed_core_roles call at the bottom of _ensure_schema —
            # left as a no-op branch here so the migration ladder still
@ -1807,6 +1834,9 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
            if current < 22:
                for sql in _V21_TO_V22_MIGRATIONS:
                    conn.execute(sql)
+            if current < 23:
+                for sql in _V22_TO_V23_MIGRATIONS:
+                    conn.execute(sql)
            conn.execute(
                "UPDATE schema_version SET version = ?, applied_at = current_timestamp",
                [SCHEMA_VERSION],
--- a/src/repositories/claude_md_template.py
+++ b/src/repositories/claude_md_template.py
@ -0,0 +1,53 @@
+"""Repository for the per-instance CLAUDE.md template override (singleton row)."""
+
+from datetime import datetime, timezone
+from typing import Any
+
+import duckdb
+
+
+class ClaudeMdTemplateRepository:
+    def __init__(self, conn: duckdb.DuckDBPyConnection):
+        self.conn = conn
+
+    def get(self) -> dict[str, Any]:
+        """Return the singleton row. Always exists post-migration; content
+        is None when no override is set (= use shipped default template)."""
+        row = self.conn.execute(
+            "SELECT id, content, updated_at, updated_by FROM claude_md_template WHERE id = 1"
+        ).fetchone()
+        if row is None:
+            # Defensive: re-seed if a previous admin manually deleted it.
+            self.conn.execute(
+                "INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
+                "ON CONFLICT (id) DO NOTHING"
+            )
+            return {"id": 1, "content": None, "updated_at": None, "updated_by": None}
+        return {
+            "id": row[0],
+            "content": row[1],
+            "updated_at": row[2],
+            "updated_by": row[3],
+        }
+
+    def set(self, content: str, *, updated_by: str) -> None:
+        now = datetime.now(timezone.utc)
+        self.conn.execute(
+            """INSERT INTO claude_md_template (id, content, updated_at, updated_by)
+               VALUES (1, ?, ?, ?)
+               ON CONFLICT (id) DO UPDATE SET
+                   content = excluded.content,
+                   updated_at = excluded.updated_at,
+                   updated_by = excluded.updated_by""",
+            [content, now, updated_by],
+        )
+
+    def reset(self, *, updated_by: str) -> None:
+        """Clear override; renderer falls back to shipped default template."""
+        now = datetime.now(timezone.utc)
+        self.conn.execute(
+            """UPDATE claude_md_template
+               SET content = NULL, updated_at = ?, updated_by = ?
+               WHERE id = 1""",
+            [now, updated_by],
+        )
--- a/tests/test_claude_md_renderer.py
+++ b/tests/test_claude_md_renderer.py
@ -0,0 +1,169 @@
+"""Unit tests for the analyst-workspace CLAUDE.md renderer (src/claude_md.py)."""
+
+import duckdb
+import pytest
+from jinja2 import TemplateError
+
+from src.db import _ensure_schema
+from src.repositories.claude_md_template import ClaudeMdTemplateRepository
+from src.claude_md import (
+    build_claude_md_context,
+    compute_default_claude_md,
+    render_claude_md,
+)
+
+
+@pytest.fixture
+def conn(tmp_path, monkeypatch):
+    monkeypatch.setenv("DATA_DIR", str(tmp_path))
+    db_path = tmp_path / "system.duckdb"
+    c = duckdb.connect(str(db_path))
+    _ensure_schema(c)
+    yield c
+    c.close()
+
+
+def _user(email="alice@example.com", is_admin=False):
+    return {
+        "id": "u1",
+        "email": email,
+        "name": "Alice",
+        "is_admin": is_admin,
+        "groups": ["Everyone"],
+    }
+
+
+# ---------------------------------------------------------------------------
+# Default (no override) — renders a non-empty markdown string
+# ---------------------------------------------------------------------------
+
+def test_compute_default_returns_non_empty(conn):
+    out = compute_default_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert out.strip() != ""
+
+
+def test_default_contains_server_url(conn):
+    out = compute_default_claude_md(conn, user=_user(), server_url="https://myagnes.example.com")
+    assert "https://myagnes.example.com" in out
+
+
+def test_default_contains_user_reference(conn):
+    # The footer uses `user.name or user.email` — a user with no name falls back to email.
+    user_no_name = {"id": "u1", "email": "bob@example.com", "name": "", "is_admin": False, "groups": []}
+    out = compute_default_claude_md(conn, user=user_no_name, server_url="https://example.com")
+    assert "bob@example.com" in out
+
+
+def test_render_uses_default_when_no_override(conn):
+    out = render_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert out.strip() != ""
+
+
+# ---------------------------------------------------------------------------
+# Override renders correctly
+# ---------------------------------------------------------------------------
+
+def test_render_uses_override_when_set(conn):
+    ClaudeMdTemplateRepository(conn).set(
+        "# {{ instance.name }} Workspace\n\nHello {{ user.email }}.",
+        updated_by="admin@example.com",
+    )
+    out = render_claude_md(conn, user=_user("charlie@example.com"), server_url="https://example.com")
+    assert "charlie@example.com" in out
+
+
+def test_render_override_tables_list(conn):
+    # Seed a table registry entry
+    conn.execute(
+        "INSERT INTO table_registry (id, name, description, query_mode, source_type) "
+        "VALUES ('t1', 'orders', 'All orders', 'local', 'keboola')"
+    )
+    ClaudeMdTemplateRepository(conn).set(
+        "{% for t in tables %}- {{ t.name }}: {{ t.description }}{% endfor %}",
+        updated_by="admin@example.com",
+    )
+    out = render_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert "orders" in out
+    assert "All orders" in out
+
+
+def test_render_override_metrics_summary(conn):
+    # Seed a metric definition — must include NOT NULL columns: display_name, sql
+    conn.execute(
+        "INSERT INTO metric_definitions (id, name, display_name, category, sql) "
+        "VALUES ('m1', 'mrr', 'MRR', 'revenue', 'SELECT SUM(amount)')"
+    )
+    ClaudeMdTemplateRepository(conn).set(
+        "Metrics: {{ metrics.count }}, cats: {{ metrics.categories | join(', ') }}",
+        updated_by="admin@example.com",
+    )
+    out = render_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert "1" in out  # 1 metric
+    assert "revenue" in out
+
+
+# ---------------------------------------------------------------------------
+# RBAC-filtered marketplaces — two users with different grants render differently
+# ---------------------------------------------------------------------------
+
+def test_marketplaces_empty_for_user_with_no_grants(conn):
+    # No grants seeded — _marketplaces_for_user returns []
+    ClaudeMdTemplateRepository(conn).set(
+        "{% if marketplaces %}HAS_PLUGINS{% else %}NO_PLUGINS{% endif %}",
+        updated_by="admin@example.com",
+    )
+    out = render_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert "NO_PLUGINS" in out
+
+
+# ---------------------------------------------------------------------------
+# Anonymous / minimal user context
+# ---------------------------------------------------------------------------
+
+def test_render_with_minimal_user_context(conn):
+    """Templates referencing user fields must work with minimal user dict."""
+    ClaudeMdTemplateRepository(conn).set(
+        "User: {{ user.email }}, admin: {{ user.is_admin }}",
+        updated_by="admin@example.com",
+    )
+    out = render_claude_md(conn, user=_user(), server_url="https://example.com")
+    assert "alice@example.com" in out
+    assert "False" in out
+
+
+# ---------------------------------------------------------------------------
+# Build context shape
+# ---------------------------------------------------------------------------
+
+def test_context_exposes_all_documented_keys(conn):
+    ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
+    for key in ("instance", "server", "sync_interval", "data_source", "tables", "metrics", "marketplaces", "user", "now", "today"):
+        assert key in ctx, f"missing context key: {key}"
+
+
+def test_context_tables_is_list(conn):
+    ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
+    assert isinstance(ctx["tables"], list)
+
+
+def test_context_metrics_shape(conn):
+    ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
+    assert "count" in ctx["metrics"]
+    assert "categories" in ctx["metrics"]
+
+
+def test_context_marketplaces_is_list(conn):
+    ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
+    assert isinstance(ctx["marketplaces"], list)
+
+
+# ---------------------------------------------------------------------------
+# Render failure raises (caller handles)
+# ---------------------------------------------------------------------------
+
+def test_render_raises_on_template_error(conn):
+    ClaudeMdTemplateRepository(conn).set(
+        "{{ does_not_exist }}", updated_by="admin@example.com"
+    )
+    with pytest.raises(TemplateError):
+        render_claude_md(conn, user=_user(), server_url="https://example.com")
--- a/tests/test_claude_md_template_repo.py
+++ b/tests/test_claude_md_template_repo.py
@ -0,0 +1,40 @@
+"""Unit tests for ClaudeMdTemplateRepository."""
+
+import duckdb
+import pytest
+
+from src.db import _ensure_schema
+from src.repositories.claude_md_template import ClaudeMdTemplateRepository
+
+
+@pytest.fixture
+def conn(tmp_path):
+    db_path = tmp_path / "system.duckdb"
+    c = duckdb.connect(str(db_path))
+    _ensure_schema(c)
+    yield c
+    c.close()
+
+
+def test_get_returns_none_on_fresh_install(conn):
+    repo = ClaudeMdTemplateRepository(conn)
+    row = repo.get()
+    assert row is not None
+    assert row["content"] is None  # default sentinel
+
+
+def test_set_stores_content(conn):
+    repo = ClaudeMdTemplateRepository(conn)
+    repo.set("# {{ instance.name }}", updated_by="admin@example.com")
+    row = repo.get()
+    assert row["content"] == "# {{ instance.name }}"
+    assert row["updated_by"] == "admin@example.com"
+    assert row["updated_at"] is not None
+
+
+def test_reset_clears_content(conn):
+    repo = ClaudeMdTemplateRepository(conn)
+    repo.set("custom template", updated_by="admin@example.com")
+    repo.reset(updated_by="admin@example.com")
+    row = repo.get()
+    assert row["content"] is None
--- a/tests/test_db_schema_version.py
+++ b/tests/test_db_schema_version.py
@ -13,8 +13,8 @@ import duckdb
 from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version


-def test_schema_version_is_22():
-    assert SCHEMA_VERSION == 22
+def test_schema_version_is_23():
+    assert SCHEMA_VERSION == 23


 def test_v20_adds_source_query(tmp_path):
@ -29,7 +29,29 @@ def test_v20_adds_source_query(tmp_path):
        ).fetchall()
    }
    assert "source_query" in cols, f"source_query missing from {cols}"
-    assert get_schema_version(conn) == 22
+    assert get_schema_version(conn) == 23
+    conn.close()
+
+
+def test_v23_adds_claude_md_template(tmp_path):
+    """v23 must create the claude_md_template singleton table."""
+    db_path = tmp_path / "system.duckdb"
+    conn = duckdb.connect(str(db_path))
+    _ensure_schema(conn)
+
+    tables = {
+        r[0] for r in conn.execute(
+            "SELECT table_name FROM information_schema.tables "
+            "WHERE table_schema = 'main'"
+        ).fetchall()
+    }
+    assert "claude_md_template" in tables, f"claude_md_template missing from {tables}"
+
+    # Singleton row seeded
+    row = conn.execute("SELECT id, content FROM claude_md_template WHERE id = 1").fetchone()
+    assert row is not None
+    assert row[0] == 1
+    assert row[1] is None  # default = no override
    conn.close()


@ -61,7 +83,7 @@ def test_v19_db_migrates_to_v20(tmp_path):

    _ensure_schema(conn)

-    assert get_schema_version(conn) == 22
+    assert get_schema_version(conn) == 23
    cols = {
        r[0] for r in conn.execute(
            "SELECT column_name FROM information_schema.columns "