feat(db,repo,renderer): schema v23 + claude_md_template + ClaudeMd renderer
- Bump SCHEMA_VERSION 22 → 23; add claude_md_template singleton table to _SYSTEM_SCHEMA and _V22_TO_V23_MIGRATIONS; wire migration + fresh-install seed - src/repositories/claude_md_template.py: ClaudeMdTemplateRepository (get/set/reset) mirroring WelcomeTemplateRepository; defensive re-seed in get() - src/claude_md.py: compute_default_claude_md / render_claude_md / build_claude_md_context — rich renderer with RBAC-filtered tables, metrics, and marketplaces; reads override from claude_md_template or falls back to config/claude_md_template.txt; raises TemplateError on broken override - config/claude_md_template.txt: default Jinja2 markdown template restored from PR #167 history (tables, metrics, marketplaces, BQ guidance, corporate memory, directory structure, per-user footer)
This commit is contained in:
parent
53f841f244
commit
f01eb4143d
7 changed files with 671 additions and 5 deletions
152
config/claude_md_template.txt
Normal file
152
config/claude_md_template.txt
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
{# Default analyst-onboarding workspace prompt for "da analyst setup".
|
||||
Rendered server-side by src/claude_md.py. Edit this file to change
|
||||
the OSS default; admins override per-instance via /admin/workspace-prompt.
|
||||
|
||||
Available context (see docs/agent-workspace-prompt.md for the full reference):
|
||||
instance.name, instance.subtitle
|
||||
server.url, server.hostname
|
||||
sync_interval — string from instance.yaml
|
||||
data_source.type — keboola | bigquery | local
|
||||
tables — list of {name, description, query_mode}
|
||||
metrics.count, metrics.categories
|
||||
marketplaces — list of {slug, name, plugins:[{name}]}
|
||||
user.id, user.email, user.name, user.is_admin, user.groups
|
||||
now, today — datetime / date string
|
||||
#}
|
||||
# {{ instance.name }} — AI Data Analyst
|
||||
|
||||
This workspace is connected to {{ server.url }}.
|
||||
{% if instance.subtitle %}Operated by **{{ instance.subtitle }}**.{% endif %}
|
||||
|
||||
## Rules
|
||||
- Before computing any business metric: run `da metrics show <category>/<name>`
|
||||
- **For canonical table list with query modes: `da catalog`.** `data/metadata/schema.json` covers `query_mode: "local"` tables only — for remote/hybrid tables it's incomplete. Treat `da catalog` as source of truth.
|
||||
- Do not use DESCRIBE/SHOW COLUMNS — use `da schema <table>` instead
|
||||
- Save work output to `user/artifacts/`
|
||||
- Sync data regularly with `da sync`
|
||||
- **Personal customizations go in `.claude/CLAUDE.local.md`, NOT here.** This file is regenerated by `da analyst setup --force`; edits here will be lost. CLAUDE.local.md is preserved across regeneration and uploaded on `da sync --upload-only`.
|
||||
|
||||
## Metrics Workflow
|
||||
1. `da metrics list` — find the relevant metric ({{ metrics.count }} available, categories: {{ metrics.categories | join(", ") or "none yet" }})
|
||||
2. `da metrics show <category>/<name>` — read SQL and business rules
|
||||
3. Use the canonical SQL from the metric definition, adapt to the question
|
||||
4. Never invent metric calculations — always check existing definitions first
|
||||
|
||||
## Data Sync
|
||||
- `da sync` — download current data from server
|
||||
- `da sync --docs-only` — just metadata and metrics (fast refresh)
|
||||
- `da sync --upload-only` — upload sessions and local notes to server
|
||||
- Data on the server refreshes every {{ sync_interval }}
|
||||
|
||||
## Available Datasets
|
||||
{% for t in tables -%}
|
||||
- `{{ t.name }}`{% if t.description %} — {{ t.description }}{% endif %}{% if t.query_mode == "remote" %} *(remote, queried on demand)*{% endif %}
|
||||
{% else -%}
|
||||
- _No tables registered yet — ask an admin to register tables in the dashboard._
|
||||
{% endfor %}
|
||||
|
||||
{% if marketplaces -%}
|
||||
## Plugins available to you
|
||||
{% for mp in marketplaces -%}
|
||||
- **{{ mp.name }}** ({{ mp.slug }}): {{ mp.plugins | map(attribute="name") | join(", ") }}
|
||||
{% endfor %}
|
||||
{% endif -%}
|
||||
|
||||
## Remote Queries (BigQuery) — when data isn't on the laptop
|
||||
|
||||
Not every table is synced. Tables registered with `query_mode: "remote"` live in
|
||||
BigQuery, accessed server-side via DuckDB's BQ extension — no parquet on disk.
|
||||
Tables you don't see in `data/parquet/` may still be queryable.
|
||||
|
||||
### Discovery first
|
||||
|
||||
```
|
||||
da catalog --json | jq '.[] | {name, source_type, query_mode}' # see all tables + their modes
|
||||
da schema <table> # columns + types
|
||||
da describe <table> -n 5 # sample rows
|
||||
```
|
||||
|
||||
For local-mode tables, query directly with `da query "SELECT … FROM <table>"`.
|
||||
|
||||
### Three patterns for `query_mode: "remote"` tables
|
||||
|
||||
| Pattern | Tool | Use when |
|
||||
|---------|------|----------|
|
||||
| **`da fetch`** (preferred) | materializes a filtered subset locally → query the snapshot | repeated questions on same slice |
|
||||
| **`da query --remote`** | one-shot, server-side execution against BigQuery | single aggregate / cheap probe |
|
||||
| **`da query --register-bq`** | hybrid joins between local snapshots and ad-hoc BQ subqueries | crossing local + remote |
|
||||
|
||||
### Permission model + cost — important
|
||||
|
||||
- BQ access goes through the **agnes server's GCE service account**, not your personal Google credentials. If a query fails with a permission error, the table is in a project the server SA cannot read — escalate to admin, do NOT try to authenticate yourself.
|
||||
- Every BQ query bills the SA's GCP project for **bytes scanned**. A naive `SELECT * FROM <large_table>` can cost real money. ALWAYS:
|
||||
- filter via `--where` on the partition column (typically a date)
|
||||
- list specific columns in `--select` — column-store BQ skips the rest, cheaper
|
||||
- run `--estimate` first when unsure of the table size or partitioning
|
||||
|
||||
### `da fetch` discipline
|
||||
|
||||
```
|
||||
# 1. ESTIMATE first — refuses to fetch without knowing the cost
|
||||
da fetch <table> --select col1,col2 --where "date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)" --estimate
|
||||
|
||||
# 2. If reasonable, fetch as a named snapshot
|
||||
da fetch <table> --select col1,col2 --where "..." --as my_recent
|
||||
|
||||
# 3. Query the local snapshot
|
||||
da query "SELECT col1, COUNT(*) FROM my_recent GROUP BY 1"
|
||||
|
||||
# 4. List + drop snapshots when done
|
||||
da snapshot list
|
||||
da snapshot drop my_recent
|
||||
```
|
||||
|
||||
Rules of thumb:
|
||||
- ALWAYS list specific columns in `--select`. Avoid implicit SELECT *.
|
||||
- ALWAYS include a `--where` for remote tables; otherwise add `--limit`.
|
||||
- ALWAYS run `--estimate` first when the table is `partition_by` / `clustered_by`
|
||||
per `da schema`, or could plausibly exceed 1 GB local bytes.
|
||||
- Reuse snapshots across questions in the same conversation — `da snapshot list`
|
||||
before fetching.
|
||||
|
||||
### Snapshot freshness — when to refresh
|
||||
|
||||
Snapshots are point-in-time copies. They go stale as the source data updates. For each new conversation:
|
||||
|
||||
```
|
||||
da snapshot list # see existing snapshots + their ages
|
||||
da snapshot drop my_recent # drop stale ones
|
||||
da fetch <table> --select ... --where ... --as my_recent # re-fetch
|
||||
```
|
||||
|
||||
### BigQuery SQL flavor for `--where`
|
||||
|
||||
Source-typed `bigquery` tables use BigQuery dialect, not DuckDB:
|
||||
|
||||
- Date literal: `DATE '2026-01-01'`
|
||||
- Timestamp literal: `TIMESTAMP '2026-01-01 00:00:00 UTC'`
|
||||
- Now: `CURRENT_DATE()`, `CURRENT_TIMESTAMP()`
|
||||
- Date arithmetic: `DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)`
|
||||
- Regex: `REGEXP_CONTAINS(col, r'pattern')` (raw string!)
|
||||
- Cast: `CAST(x AS INT64)` (NOT `INT`)
|
||||
|
||||
## Corporate Memory
|
||||
|
||||
Rules injected by `da sync` from the server's corporate knowledge base live in `.claude/rules/km_*.md`. They are automatically loaded by Claude Code on every session start.
|
||||
|
||||
- `km_<id>.md` — mandatory rules (always enforced)
|
||||
- `km_approved.md` — approved guidance (confidence × recency ranked)
|
||||
|
||||
Run `da sync` to refresh. Rules are pruned automatically when items are revoked.
|
||||
|
||||
## Directory Structure
|
||||
- `data/` — read-only data downloaded from server
|
||||
- `data/parquet/` — table data in Parquet format
|
||||
- `data/duckdb/` — local analytics DuckDB database
|
||||
- `data/metadata/` — profiles, schema, metrics cache
|
||||
- `user/` — your workspace (persistent across syncs)
|
||||
- `user/artifacts/` — analysis outputs, reports, charts
|
||||
- `user/sessions/` — Claude Code session logs
|
||||
- `.claude/CLAUDE.local.md` — your personal notes + workspace customizations. **Never overwritten by `da analyst setup --force`.** Uploaded to the server on `da sync --upload-only`. Put any local-only Claude instructions, project-specific reminders, or temporary notes here — NOT in CLAUDE.md (this file is regenerated from a template).
|
||||
|
||||
_Hello {{ user.name or user.email }} — generated {{ today }}._
|
||||
200
src/claude_md.py
Normal file
200
src/claude_md.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
"""Render the analyst-workspace CLAUDE.md prompt.
|
||||
|
||||
The template source is admin-editable at /admin/workspace-prompt. When no
|
||||
override is set, the default content is the Jinja2 markdown template shipped
|
||||
at config/claude_md_template.txt. When an override is saved, it replaces the
|
||||
default for every call to render_claude_md().
|
||||
|
||||
Override content is a Jinja2 template (autoescape=False, StrictUndefined).
|
||||
Available placeholders: instance.{name,subtitle}, server.{url,hostname},
|
||||
sync_interval, data_source.type, tables (list), metrics.{count,categories},
|
||||
marketplaces (RBAC-filtered list), user.{id,email,name,is_admin,groups},
|
||||
now, today.
|
||||
|
||||
See also: surfaced as the "Agent Workspace Prompt" admin editor at
|
||||
/admin/workspace-prompt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import duckdb
|
||||
from jinja2 import Environment, StrictUndefined, TemplateError
|
||||
|
||||
from app.instance_config import (
|
||||
get_data_source_type,
|
||||
get_instance_name,
|
||||
get_instance_subtitle,
|
||||
get_sync_interval,
|
||||
)
|
||||
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_TEMPLATE_PATH = (
|
||||
Path(__file__).resolve().parent.parent / "config" / "claude_md_template.txt"
|
||||
)
|
||||
|
||||
|
||||
def _load_default_template() -> str:
|
||||
if _DEFAULT_TEMPLATE_PATH.exists():
|
||||
return _DEFAULT_TEMPLATE_PATH.read_text(encoding="utf-8")
|
||||
# Last-resort embedded fallback if the OSS template file is missing
|
||||
# from the install (e.g., partial Docker COPY).
|
||||
return (
|
||||
"# {{ instance.name }} — AI Data Analyst\n\n"
|
||||
"This workspace is connected to {{ server.url }}.\n"
|
||||
"Data refreshes every {{ sync_interval }}.\n"
|
||||
)
|
||||
|
||||
|
||||
def _list_tables(conn: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"""SELECT name, description, query_mode
|
||||
FROM table_registry
|
||||
ORDER BY name"""
|
||||
).fetchall()
|
||||
except duckdb.CatalogException:
|
||||
return []
|
||||
return [
|
||||
{"name": r[0], "description": r[1] or "", "query_mode": r[2] or "local"}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def _metrics_summary(conn: duckdb.DuckDBPyConnection) -> dict[str, Any]:
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT category, COUNT(*) FROM metric_definitions GROUP BY category"
|
||||
).fetchall()
|
||||
except duckdb.CatalogException:
|
||||
return {"count": 0, "categories": []}
|
||||
return {
|
||||
"count": sum(r[1] for r in rows),
|
||||
"categories": sorted({r[0] for r in rows if r[0]}),
|
||||
}
|
||||
|
||||
|
||||
def _marketplaces_for_user(
|
||||
conn: duckdb.DuckDBPyConnection, user: dict[str, Any]
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return marketplaces with the plugins the user is allowed to see.
|
||||
|
||||
Delegates RBAC filtering entirely to resolve_allowed_plugins, which
|
||||
returns List[dict] with marketplace_slug, original_name, etc.
|
||||
Results are grouped by marketplace slug; display names are fetched
|
||||
from marketplace_registry in a single query.
|
||||
"""
|
||||
try:
|
||||
from src.marketplace_filter import resolve_allowed_plugins
|
||||
allowed = resolve_allowed_plugins(conn, user)
|
||||
except Exception:
|
||||
logger.exception("_marketplaces_for_user: marketplace plugin resolution failed")
|
||||
return []
|
||||
if not allowed:
|
||||
return []
|
||||
|
||||
# Build slug → display name lookup from registry
|
||||
slugs = list({p["marketplace_slug"] for p in allowed})
|
||||
placeholders = ",".join(["?"] * len(slugs))
|
||||
try:
|
||||
name_rows = conn.execute(
|
||||
f"SELECT id, name FROM marketplace_registry WHERE id IN ({placeholders})",
|
||||
slugs,
|
||||
).fetchall()
|
||||
except duckdb.CatalogException:
|
||||
name_rows = []
|
||||
slug_to_name: dict[str, str] = {r[0]: r[1] for r in name_rows}
|
||||
|
||||
grouped: dict[str, dict[str, Any]] = {}
|
||||
for plugin in allowed:
|
||||
slug = plugin["marketplace_slug"]
|
||||
bucket = grouped.setdefault(
|
||||
slug,
|
||||
{
|
||||
"slug": slug,
|
||||
"name": slug_to_name.get(slug, slug),
|
||||
"plugins": [],
|
||||
},
|
||||
)
|
||||
bucket["plugins"].append({"name": plugin["original_name"]})
|
||||
|
||||
return list(grouped.values())
|
||||
|
||||
|
||||
def build_claude_md_context(
|
||||
conn: duckdb.DuckDBPyConnection,
|
||||
*,
|
||||
user: dict[str, Any],
|
||||
server_url: str,
|
||||
) -> dict[str, Any]:
|
||||
"""Compose the Jinja2 render context for the CLAUDE.md template. Pure, no side effects."""
|
||||
now = datetime.now(timezone.utc)
|
||||
parsed = urlparse(server_url)
|
||||
return {
|
||||
"instance": {
|
||||
"name": get_instance_name(),
|
||||
"subtitle": get_instance_subtitle(),
|
||||
},
|
||||
"server": {
|
||||
"url": server_url,
|
||||
"hostname": parsed.hostname or "",
|
||||
},
|
||||
"sync_interval": get_sync_interval(),
|
||||
"data_source": {"type": get_data_source_type()},
|
||||
"tables": _list_tables(conn),
|
||||
"metrics": _metrics_summary(conn),
|
||||
"marketplaces": _marketplaces_for_user(conn, user),
|
||||
"user": {
|
||||
"id": user.get("id", ""),
|
||||
"email": user.get("email", ""),
|
||||
"name": user.get("name") or "",
|
||||
"is_admin": bool(user.get("is_admin")),
|
||||
"groups": user.get("groups") or [],
|
||||
},
|
||||
"now": now,
|
||||
"today": date.today().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def compute_default_claude_md(
|
||||
conn: duckdb.DuckDBPyConnection,
|
||||
*,
|
||||
user: dict[str, Any],
|
||||
server_url: str,
|
||||
) -> str:
|
||||
"""Return the rendered default CLAUDE.md from config/claude_md_template.txt.
|
||||
|
||||
Renders the shipped Jinja2 template with the given user's RBAC context.
|
||||
On TemplateError, raises — callers that want graceful fallback should catch.
|
||||
"""
|
||||
source = _load_default_template()
|
||||
env = Environment(undefined=StrictUndefined, autoescape=False)
|
||||
template = env.from_string(source)
|
||||
return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
|
||||
|
||||
|
||||
def render_claude_md(
|
||||
conn: duckdb.DuckDBPyConnection,
|
||||
*,
|
||||
user: dict[str, Any],
|
||||
server_url: str,
|
||||
) -> str:
|
||||
"""Resolve the active template (override or default) and render it for the given user.
|
||||
|
||||
When an admin override is set, renders it via Jinja2 (StrictUndefined, autoescape=False).
|
||||
When no override is set, renders the shipped default template.
|
||||
|
||||
On TemplateError, raises — the API layer catches this and returns 400/500.
|
||||
"""
|
||||
row = ClaudeMdTemplateRepository(conn).get()
|
||||
source = row["content"] if row.get("content") else _load_default_template()
|
||||
env = Environment(undefined=StrictUndefined, autoescape=False)
|
||||
template = env.from_string(source)
|
||||
return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
|
||||
32
src/db.py
32
src/db.py
|
|
@ -39,7 +39,7 @@ def _maybe_instrument(con, db_tag: str):
|
|||
|
||||
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||
|
||||
SCHEMA_VERSION = 22
|
||||
SCHEMA_VERSION = 23
|
||||
|
||||
_SYSTEM_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
|
|
@ -427,6 +427,18 @@ CREATE TABLE IF NOT EXISTS setup_banner (
|
|||
updated_by VARCHAR,
|
||||
CONSTRAINT singleton CHECK (id = 1)
|
||||
);
|
||||
|
||||
-- v23: customizable analyst-workspace CLAUDE.md template.
|
||||
-- Singleton row (id=1). NULL content means "use the default template
|
||||
-- shipped at config/claude_md_template.txt" (Jinja2 markdown). Admin override
|
||||
-- stores the raw Jinja2 source string.
|
||||
CREATE TABLE IF NOT EXISTS claude_md_template (
|
||||
id INTEGER PRIMARY KEY DEFAULT 1,
|
||||
content TEXT,
|
||||
updated_at TIMESTAMP,
|
||||
updated_by VARCHAR,
|
||||
CONSTRAINT singleton CHECK (id = 1)
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -1658,6 +1670,17 @@ _V21_TO_V22_MIGRATIONS = [
|
|||
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
|
||||
]
|
||||
|
||||
_V22_TO_V23_MIGRATIONS = [
|
||||
"""CREATE TABLE IF NOT EXISTS claude_md_template (
|
||||
id INTEGER PRIMARY KEY DEFAULT 1,
|
||||
content TEXT,
|
||||
updated_at TIMESTAMP,
|
||||
updated_by VARCHAR,
|
||||
CONSTRAINT singleton CHECK (id = 1)
|
||||
)""",
|
||||
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
|
||||
]
|
||||
|
||||
|
||||
def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||
"""Create tables if they don't exist. Apply migrations if schema version changed.
|
||||
|
|
@ -1724,6 +1747,10 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|||
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) "
|
||||
"ON CONFLICT (id) DO NOTHING"
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
|
||||
"ON CONFLICT (id) DO NOTHING"
|
||||
)
|
||||
# Fresh-install seed is handled by the unconditional
|
||||
# _seed_core_roles call at the bottom of _ensure_schema —
|
||||
# left as a no-op branch here so the migration ladder still
|
||||
|
|
@ -1807,6 +1834,9 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|||
if current < 22:
|
||||
for sql in _V21_TO_V22_MIGRATIONS:
|
||||
conn.execute(sql)
|
||||
if current < 23:
|
||||
for sql in _V22_TO_V23_MIGRATIONS:
|
||||
conn.execute(sql)
|
||||
conn.execute(
|
||||
"UPDATE schema_version SET version = ?, applied_at = current_timestamp",
|
||||
[SCHEMA_VERSION],
|
||||
|
|
|
|||
53
src/repositories/claude_md_template.py
Normal file
53
src/repositories/claude_md_template.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
"""Repository for the per-instance CLAUDE.md template override (singleton row)."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import duckdb
|
||||
|
||||
|
||||
class ClaudeMdTemplateRepository:
|
||||
def __init__(self, conn: duckdb.DuckDBPyConnection):
|
||||
self.conn = conn
|
||||
|
||||
def get(self) -> dict[str, Any]:
|
||||
"""Return the singleton row. Always exists post-migration; content
|
||||
is None when no override is set (= use shipped default template)."""
|
||||
row = self.conn.execute(
|
||||
"SELECT id, content, updated_at, updated_by FROM claude_md_template WHERE id = 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
# Defensive: re-seed if a previous admin manually deleted it.
|
||||
self.conn.execute(
|
||||
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
|
||||
"ON CONFLICT (id) DO NOTHING"
|
||||
)
|
||||
return {"id": 1, "content": None, "updated_at": None, "updated_by": None}
|
||||
return {
|
||||
"id": row[0],
|
||||
"content": row[1],
|
||||
"updated_at": row[2],
|
||||
"updated_by": row[3],
|
||||
}
|
||||
|
||||
def set(self, content: str, *, updated_by: str) -> None:
|
||||
now = datetime.now(timezone.utc)
|
||||
self.conn.execute(
|
||||
"""INSERT INTO claude_md_template (id, content, updated_at, updated_by)
|
||||
VALUES (1, ?, ?, ?)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
content = excluded.content,
|
||||
updated_at = excluded.updated_at,
|
||||
updated_by = excluded.updated_by""",
|
||||
[content, now, updated_by],
|
||||
)
|
||||
|
||||
def reset(self, *, updated_by: str) -> None:
|
||||
"""Clear override; renderer falls back to shipped default template."""
|
||||
now = datetime.now(timezone.utc)
|
||||
self.conn.execute(
|
||||
"""UPDATE claude_md_template
|
||||
SET content = NULL, updated_at = ?, updated_by = ?
|
||||
WHERE id = 1""",
|
||||
[now, updated_by],
|
||||
)
|
||||
169
tests/test_claude_md_renderer.py
Normal file
169
tests/test_claude_md_renderer.py
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
"""Unit tests for the analyst-workspace CLAUDE.md renderer (src/claude_md.py)."""
|
||||
|
||||
import duckdb
|
||||
import pytest
|
||||
from jinja2 import TemplateError
|
||||
|
||||
from src.db import _ensure_schema
|
||||
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||
from src.claude_md import (
|
||||
build_claude_md_context,
|
||||
compute_default_claude_md,
|
||||
render_claude_md,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||
db_path = tmp_path / "system.duckdb"
|
||||
c = duckdb.connect(str(db_path))
|
||||
_ensure_schema(c)
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def _user(email="alice@example.com", is_admin=False):
|
||||
return {
|
||||
"id": "u1",
|
||||
"email": email,
|
||||
"name": "Alice",
|
||||
"is_admin": is_admin,
|
||||
"groups": ["Everyone"],
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Default (no override) — renders a non-empty markdown string
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_compute_default_returns_non_empty(conn):
|
||||
out = compute_default_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert out.strip() != ""
|
||||
|
||||
|
||||
def test_default_contains_server_url(conn):
|
||||
out = compute_default_claude_md(conn, user=_user(), server_url="https://myagnes.example.com")
|
||||
assert "https://myagnes.example.com" in out
|
||||
|
||||
|
||||
def test_default_contains_user_reference(conn):
|
||||
# The footer uses `user.name or user.email` — a user with no name falls back to email.
|
||||
user_no_name = {"id": "u1", "email": "bob@example.com", "name": "", "is_admin": False, "groups": []}
|
||||
out = compute_default_claude_md(conn, user=user_no_name, server_url="https://example.com")
|
||||
assert "bob@example.com" in out
|
||||
|
||||
|
||||
def test_render_uses_default_when_no_override(conn):
|
||||
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert out.strip() != ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Override renders correctly
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_render_uses_override_when_set(conn):
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"# {{ instance.name }} Workspace\n\nHello {{ user.email }}.",
|
||||
updated_by="admin@example.com",
|
||||
)
|
||||
out = render_claude_md(conn, user=_user("charlie@example.com"), server_url="https://example.com")
|
||||
assert "charlie@example.com" in out
|
||||
|
||||
|
||||
def test_render_override_tables_list(conn):
|
||||
# Seed a table registry entry
|
||||
conn.execute(
|
||||
"INSERT INTO table_registry (id, name, description, query_mode, source_type) "
|
||||
"VALUES ('t1', 'orders', 'All orders', 'local', 'keboola')"
|
||||
)
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"{% for t in tables %}- {{ t.name }}: {{ t.description }}{% endfor %}",
|
||||
updated_by="admin@example.com",
|
||||
)
|
||||
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert "orders" in out
|
||||
assert "All orders" in out
|
||||
|
||||
|
||||
def test_render_override_metrics_summary(conn):
|
||||
# Seed a metric definition — must include NOT NULL columns: display_name, sql
|
||||
conn.execute(
|
||||
"INSERT INTO metric_definitions (id, name, display_name, category, sql) "
|
||||
"VALUES ('m1', 'mrr', 'MRR', 'revenue', 'SELECT SUM(amount)')"
|
||||
)
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"Metrics: {{ metrics.count }}, cats: {{ metrics.categories | join(', ') }}",
|
||||
updated_by="admin@example.com",
|
||||
)
|
||||
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert "1" in out # 1 metric
|
||||
assert "revenue" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RBAC-filtered marketplaces — two users with different grants render differently
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_marketplaces_empty_for_user_with_no_grants(conn):
|
||||
# No grants seeded — _marketplaces_for_user returns []
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"{% if marketplaces %}HAS_PLUGINS{% else %}NO_PLUGINS{% endif %}",
|
||||
updated_by="admin@example.com",
|
||||
)
|
||||
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert "NO_PLUGINS" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anonymous / minimal user context
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_render_with_minimal_user_context(conn):
|
||||
"""Templates referencing user fields must work with minimal user dict."""
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"User: {{ user.email }}, admin: {{ user.is_admin }}",
|
||||
updated_by="admin@example.com",
|
||||
)
|
||||
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
assert "alice@example.com" in out
|
||||
assert "False" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build context shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_context_exposes_all_documented_keys(conn):
|
||||
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||
for key in ("instance", "server", "sync_interval", "data_source", "tables", "metrics", "marketplaces", "user", "now", "today"):
|
||||
assert key in ctx, f"missing context key: {key}"
|
||||
|
||||
|
||||
def test_context_tables_is_list(conn):
|
||||
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||
assert isinstance(ctx["tables"], list)
|
||||
|
||||
|
||||
def test_context_metrics_shape(conn):
|
||||
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||
assert "count" in ctx["metrics"]
|
||||
assert "categories" in ctx["metrics"]
|
||||
|
||||
|
||||
def test_context_marketplaces_is_list(conn):
|
||||
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||
assert isinstance(ctx["marketplaces"], list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Render failure raises (caller handles)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_render_raises_on_template_error(conn):
|
||||
ClaudeMdTemplateRepository(conn).set(
|
||||
"{{ does_not_exist }}", updated_by="admin@example.com"
|
||||
)
|
||||
with pytest.raises(TemplateError):
|
||||
render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||
40
tests/test_claude_md_template_repo.py
Normal file
40
tests/test_claude_md_template_repo.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""Unit tests for ClaudeMdTemplateRepository."""
|
||||
|
||||
import duckdb
|
||||
import pytest
|
||||
|
||||
from src.db import _ensure_schema
|
||||
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn(tmp_path):
|
||||
db_path = tmp_path / "system.duckdb"
|
||||
c = duckdb.connect(str(db_path))
|
||||
_ensure_schema(c)
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def test_get_returns_none_on_fresh_install(conn):
|
||||
repo = ClaudeMdTemplateRepository(conn)
|
||||
row = repo.get()
|
||||
assert row is not None
|
||||
assert row["content"] is None # default sentinel
|
||||
|
||||
|
||||
def test_set_stores_content(conn):
|
||||
repo = ClaudeMdTemplateRepository(conn)
|
||||
repo.set("# {{ instance.name }}", updated_by="admin@example.com")
|
||||
row = repo.get()
|
||||
assert row["content"] == "# {{ instance.name }}"
|
||||
assert row["updated_by"] == "admin@example.com"
|
||||
assert row["updated_at"] is not None
|
||||
|
||||
|
||||
def test_reset_clears_content(conn):
|
||||
repo = ClaudeMdTemplateRepository(conn)
|
||||
repo.set("custom template", updated_by="admin@example.com")
|
||||
repo.reset(updated_by="admin@example.com")
|
||||
row = repo.get()
|
||||
assert row["content"] is None
|
||||
|
|
@ -13,8 +13,8 @@ import duckdb
|
|||
from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version
|
||||
|
||||
|
||||
def test_schema_version_is_22():
|
||||
assert SCHEMA_VERSION == 22
|
||||
def test_schema_version_is_23():
|
||||
assert SCHEMA_VERSION == 23
|
||||
|
||||
|
||||
def test_v20_adds_source_query(tmp_path):
|
||||
|
|
@ -29,7 +29,29 @@ def test_v20_adds_source_query(tmp_path):
|
|||
).fetchall()
|
||||
}
|
||||
assert "source_query" in cols, f"source_query missing from {cols}"
|
||||
assert get_schema_version(conn) == 22
|
||||
assert get_schema_version(conn) == 23
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_v23_adds_claude_md_template(tmp_path):
|
||||
"""v23 must create the claude_md_template singleton table."""
|
||||
db_path = tmp_path / "system.duckdb"
|
||||
conn = duckdb.connect(str(db_path))
|
||||
_ensure_schema(conn)
|
||||
|
||||
tables = {
|
||||
r[0] for r in conn.execute(
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
"WHERE table_schema = 'main'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "claude_md_template" in tables, f"claude_md_template missing from {tables}"
|
||||
|
||||
# Singleton row seeded
|
||||
row = conn.execute("SELECT id, content FROM claude_md_template WHERE id = 1").fetchone()
|
||||
assert row is not None
|
||||
assert row[0] == 1
|
||||
assert row[1] is None # default = no override
|
||||
conn.close()
|
||||
|
||||
|
||||
|
|
@ -61,7 +83,7 @@ def test_v19_db_migrates_to_v20(tmp_path):
|
|||
|
||||
_ensure_schema(conn)
|
||||
|
||||
assert get_schema_version(conn) == 22
|
||||
assert get_schema_version(conn) == 23
|
||||
cols = {
|
||||
r[0] for r in conn.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
|
|
|
|||
Loading…
Reference in a new issue