feat(db,repo,renderer): schema v23 + claude_md_template + ClaudeMd renderer
- Bump SCHEMA_VERSION 22 → 23; add claude_md_template singleton table to _SYSTEM_SCHEMA and _V22_TO_V23_MIGRATIONS; wire migration + fresh-install seed - src/repositories/claude_md_template.py: ClaudeMdTemplateRepository (get/set/reset) mirroring WelcomeTemplateRepository; defensive re-seed in get() - src/claude_md.py: compute_default_claude_md / render_claude_md / build_claude_md_context — rich renderer with RBAC-filtered tables, metrics, and marketplaces; reads override from claude_md_template or falls back to config/claude_md_template.txt; raises TemplateError on broken override - config/claude_md_template.txt: default Jinja2 markdown template restored from PR #167 history (tables, metrics, marketplaces, BQ guidance, corporate memory, directory structure, per-user footer)
This commit is contained in:
parent
53f841f244
commit
f01eb4143d
7 changed files with 671 additions and 5 deletions
152
config/claude_md_template.txt
Normal file
152
config/claude_md_template.txt
Normal file
|
|
@ -0,0 +1,152 @@
|
||||||
|
{# Default analyst-onboarding workspace prompt for "da analyst setup".
|
||||||
|
Rendered server-side by src/claude_md.py. Edit this file to change
|
||||||
|
the OSS default; admins override per-instance via /admin/workspace-prompt.
|
||||||
|
|
||||||
|
Available context (see docs/agent-workspace-prompt.md for the full reference):
|
||||||
|
instance.name, instance.subtitle
|
||||||
|
server.url, server.hostname
|
||||||
|
sync_interval — string from instance.yaml
|
||||||
|
data_source.type — keboola | bigquery | local
|
||||||
|
tables — list of {name, description, query_mode}
|
||||||
|
metrics.count, metrics.categories
|
||||||
|
marketplaces — list of {slug, name, plugins:[{name}]}
|
||||||
|
user.id, user.email, user.name, user.is_admin, user.groups
|
||||||
|
now, today — datetime / date string
|
||||||
|
#}
|
||||||
|
# {{ instance.name }} — AI Data Analyst
|
||||||
|
|
||||||
|
This workspace is connected to {{ server.url }}.
|
||||||
|
{% if instance.subtitle %}Operated by **{{ instance.subtitle }}**.{% endif %}
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
- Before computing any business metric: run `da metrics show <category>/<name>`
|
||||||
|
- **For canonical table list with query modes: `da catalog`.** `data/metadata/schema.json` covers `query_mode: "local"` tables only — for remote/hybrid tables it's incomplete. Treat `da catalog` as source of truth.
|
||||||
|
- Do not use DESCRIBE/SHOW COLUMNS — use `da schema <table>` instead
|
||||||
|
- Save work output to `user/artifacts/`
|
||||||
|
- Sync data regularly with `da sync`
|
||||||
|
- **Personal customizations go in `.claude/CLAUDE.local.md`, NOT here.** This file is regenerated by `da analyst setup --force`; edits here will be lost. CLAUDE.local.md is preserved across regeneration and uploaded on `da sync --upload-only`.
|
||||||
|
|
||||||
|
## Metrics Workflow
|
||||||
|
1. `da metrics list` — find the relevant metric ({{ metrics.count }} available, categories: {{ metrics.categories | join(", ") or "none yet" }})
|
||||||
|
2. `da metrics show <category>/<name>` — read SQL and business rules
|
||||||
|
3. Use the canonical SQL from the metric definition, adapt to the question
|
||||||
|
4. Never invent metric calculations — always check existing definitions first
|
||||||
|
|
||||||
|
## Data Sync
|
||||||
|
- `da sync` — download current data from server
|
||||||
|
- `da sync --docs-only` — just metadata and metrics (fast refresh)
|
||||||
|
- `da sync --upload-only` — upload sessions and local notes to server
|
||||||
|
- Data on the server refreshes every {{ sync_interval }}
|
||||||
|
|
||||||
|
## Available Datasets
|
||||||
|
{% for t in tables -%}
|
||||||
|
- `{{ t.name }}`{% if t.description %} — {{ t.description }}{% endif %}{% if t.query_mode == "remote" %} *(remote, queried on demand)*{% endif %}
|
||||||
|
{% else -%}
|
||||||
|
- _No tables registered yet — ask an admin to register tables in the dashboard._
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
{% if marketplaces -%}
|
||||||
|
## Plugins available to you
|
||||||
|
{% for mp in marketplaces -%}
|
||||||
|
- **{{ mp.name }}** ({{ mp.slug }}): {{ mp.plugins | map(attribute="name") | join(", ") }}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif -%}
|
||||||
|
|
||||||
|
## Remote Queries (BigQuery) — when data isn't on the laptop
|
||||||
|
|
||||||
|
Not every table is synced. Tables registered with `query_mode: "remote"` live in
|
||||||
|
BigQuery, accessed server-side via DuckDB's BQ extension — no parquet on disk.
|
||||||
|
Tables you don't see in `data/parquet/` may still be queryable.
|
||||||
|
|
||||||
|
### Discovery first
|
||||||
|
|
||||||
|
```
|
||||||
|
da catalog --json | jq '.[] | {name, source_type, query_mode}' # see all tables + their modes
|
||||||
|
da schema <table> # columns + types
|
||||||
|
da describe <table> -n 5 # sample rows
|
||||||
|
```
|
||||||
|
|
||||||
|
For local-mode tables, query directly with `da query "SELECT … FROM <table>"`.
|
||||||
|
|
||||||
|
### Three patterns for `query_mode: "remote"` tables
|
||||||
|
|
||||||
|
| Pattern | Tool | Use when |
|
||||||
|
|---------|------|----------|
|
||||||
|
| **`da fetch`** (preferred) | materializes a filtered subset locally → query the snapshot | repeated questions on same slice |
|
||||||
|
| **`da query --remote`** | one-shot, server-side execution against BigQuery | single aggregate / cheap probe |
|
||||||
|
| **`da query --register-bq`** | hybrid joins between local snapshots and ad-hoc BQ subqueries | crossing local + remote |
|
||||||
|
|
||||||
|
### Permission model + cost — important
|
||||||
|
|
||||||
|
- BQ access goes through the **agnes server's GCE service account**, not your personal Google credentials. If a query fails with a permission error, the table is in a project the server SA cannot read — escalate to admin, do NOT try to authenticate yourself.
|
||||||
|
- Every BQ query bills the SA's GCP project for **bytes scanned**. A naive `SELECT * FROM <large_table>` can cost real money. ALWAYS:
|
||||||
|
- filter via `--where` on the partition column (typically a date)
|
||||||
|
- list specific columns in `--select` — column-store BQ skips the rest, cheaper
|
||||||
|
- run `--estimate` first when unsure of the table size or partitioning
|
||||||
|
|
||||||
|
### `da fetch` discipline
|
||||||
|
|
||||||
|
```
|
||||||
|
# 1. ESTIMATE first — refuses to fetch without knowing the cost
|
||||||
|
da fetch <table> --select col1,col2 --where "date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)" --estimate
|
||||||
|
|
||||||
|
# 2. If reasonable, fetch as a named snapshot
|
||||||
|
da fetch <table> --select col1,col2 --where "..." --as my_recent
|
||||||
|
|
||||||
|
# 3. Query the local snapshot
|
||||||
|
da query "SELECT col1, COUNT(*) FROM my_recent GROUP BY 1"
|
||||||
|
|
||||||
|
# 4. List + drop snapshots when done
|
||||||
|
da snapshot list
|
||||||
|
da snapshot drop my_recent
|
||||||
|
```
|
||||||
|
|
||||||
|
Rules of thumb:
|
||||||
|
- ALWAYS list specific columns in `--select`. Avoid implicit SELECT *.
|
||||||
|
- ALWAYS include a `--where` for remote tables; otherwise add `--limit`.
|
||||||
|
- ALWAYS run `--estimate` first when the table is `partition_by` / `clustered_by`
|
||||||
|
per `da schema`, or could plausibly exceed 1 GB local bytes.
|
||||||
|
- Reuse snapshots across questions in the same conversation — `da snapshot list`
|
||||||
|
before fetching.
|
||||||
|
|
||||||
|
### Snapshot freshness — when to refresh
|
||||||
|
|
||||||
|
Snapshots are point-in-time copies. They go stale as the source data updates. For each new conversation:
|
||||||
|
|
||||||
|
```
|
||||||
|
da snapshot list # see existing snapshots + their ages
|
||||||
|
da snapshot drop my_recent # drop stale ones
|
||||||
|
da fetch <table> --select ... --where ... --as my_recent # re-fetch
|
||||||
|
```
|
||||||
|
|
||||||
|
### BigQuery SQL flavor for `--where`
|
||||||
|
|
||||||
|
Source-typed `bigquery` tables use BigQuery dialect, not DuckDB:
|
||||||
|
|
||||||
|
- Date literal: `DATE '2026-01-01'`
|
||||||
|
- Timestamp literal: `TIMESTAMP '2026-01-01 00:00:00 UTC'`
|
||||||
|
- Now: `CURRENT_DATE()`, `CURRENT_TIMESTAMP()`
|
||||||
|
- Date arithmetic: `DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)`
|
||||||
|
- Regex: `REGEXP_CONTAINS(col, r'pattern')` (raw string!)
|
||||||
|
- Cast: `CAST(x AS INT64)` (NOT `INT`)
|
||||||
|
|
||||||
|
## Corporate Memory
|
||||||
|
|
||||||
|
Rules injected by `da sync` from the server's corporate knowledge base live in `.claude/rules/km_*.md`. They are automatically loaded by Claude Code on every session start.
|
||||||
|
|
||||||
|
- `km_<id>.md` — mandatory rules (always enforced)
|
||||||
|
- `km_approved.md` — approved guidance (confidence × recency ranked)
|
||||||
|
|
||||||
|
Run `da sync` to refresh. Rules are pruned automatically when items are revoked.
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
- `data/` — read-only data downloaded from server
|
||||||
|
- `data/parquet/` — table data in Parquet format
|
||||||
|
- `data/duckdb/` — local analytics DuckDB database
|
||||||
|
- `data/metadata/` — profiles, schema, metrics cache
|
||||||
|
- `user/` — your workspace (persistent across syncs)
|
||||||
|
- `user/artifacts/` — analysis outputs, reports, charts
|
||||||
|
- `user/sessions/` — Claude Code session logs
|
||||||
|
- `.claude/CLAUDE.local.md` — your personal notes + workspace customizations. **Never overwritten by `da analyst setup --force`.** Uploaded to the server on `da sync --upload-only`. Put any local-only Claude instructions, project-specific reminders, or temporary notes here — NOT in CLAUDE.md (this file is regenerated from a template).
|
||||||
|
|
||||||
|
_Hello {{ user.name or user.email }} — generated {{ today }}._
|
||||||
200
src/claude_md.py
Normal file
200
src/claude_md.py
Normal file
|
|
@ -0,0 +1,200 @@
|
||||||
|
"""Render the analyst-workspace CLAUDE.md prompt.
|
||||||
|
|
||||||
|
The template source is admin-editable at /admin/workspace-prompt. When no
|
||||||
|
override is set, the default content is the Jinja2 markdown template shipped
|
||||||
|
at config/claude_md_template.txt. When an override is saved, it replaces the
|
||||||
|
default for every call to render_claude_md().
|
||||||
|
|
||||||
|
Override content is a Jinja2 template (autoescape=False, StrictUndefined).
|
||||||
|
Available placeholders: instance.{name,subtitle}, server.{url,hostname},
|
||||||
|
sync_interval, data_source.type, tables (list), metrics.{count,categories},
|
||||||
|
marketplaces (RBAC-filtered list), user.{id,email,name,is_admin,groups},
|
||||||
|
now, today.
|
||||||
|
|
||||||
|
See also: surfaced as the "Agent Workspace Prompt" admin editor at
|
||||||
|
/admin/workspace-prompt.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import date, datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
from jinja2 import Environment, StrictUndefined, TemplateError
|
||||||
|
|
||||||
|
from app.instance_config import (
|
||||||
|
get_data_source_type,
|
||||||
|
get_instance_name,
|
||||||
|
get_instance_subtitle,
|
||||||
|
get_sync_interval,
|
||||||
|
)
|
||||||
|
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DEFAULT_TEMPLATE_PATH = (
|
||||||
|
Path(__file__).resolve().parent.parent / "config" / "claude_md_template.txt"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_default_template() -> str:
|
||||||
|
if _DEFAULT_TEMPLATE_PATH.exists():
|
||||||
|
return _DEFAULT_TEMPLATE_PATH.read_text(encoding="utf-8")
|
||||||
|
# Last-resort embedded fallback if the OSS template file is missing
|
||||||
|
# from the install (e.g., partial Docker COPY).
|
||||||
|
return (
|
||||||
|
"# {{ instance.name }} — AI Data Analyst\n\n"
|
||||||
|
"This workspace is connected to {{ server.url }}.\n"
|
||||||
|
"Data refreshes every {{ sync_interval }}.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _list_tables(conn: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT name, description, query_mode
|
||||||
|
FROM table_registry
|
||||||
|
ORDER BY name"""
|
||||||
|
).fetchall()
|
||||||
|
except duckdb.CatalogException:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
{"name": r[0], "description": r[1] or "", "query_mode": r[2] or "local"}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _metrics_summary(conn: duckdb.DuckDBPyConnection) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT category, COUNT(*) FROM metric_definitions GROUP BY category"
|
||||||
|
).fetchall()
|
||||||
|
except duckdb.CatalogException:
|
||||||
|
return {"count": 0, "categories": []}
|
||||||
|
return {
|
||||||
|
"count": sum(r[1] for r in rows),
|
||||||
|
"categories": sorted({r[0] for r in rows if r[0]}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _marketplaces_for_user(
|
||||||
|
conn: duckdb.DuckDBPyConnection, user: dict[str, Any]
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Return marketplaces with the plugins the user is allowed to see.
|
||||||
|
|
||||||
|
Delegates RBAC filtering entirely to resolve_allowed_plugins, which
|
||||||
|
returns List[dict] with marketplace_slug, original_name, etc.
|
||||||
|
Results are grouped by marketplace slug; display names are fetched
|
||||||
|
from marketplace_registry in a single query.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from src.marketplace_filter import resolve_allowed_plugins
|
||||||
|
allowed = resolve_allowed_plugins(conn, user)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("_marketplaces_for_user: marketplace plugin resolution failed")
|
||||||
|
return []
|
||||||
|
if not allowed:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Build slug → display name lookup from registry
|
||||||
|
slugs = list({p["marketplace_slug"] for p in allowed})
|
||||||
|
placeholders = ",".join(["?"] * len(slugs))
|
||||||
|
try:
|
||||||
|
name_rows = conn.execute(
|
||||||
|
f"SELECT id, name FROM marketplace_registry WHERE id IN ({placeholders})",
|
||||||
|
slugs,
|
||||||
|
).fetchall()
|
||||||
|
except duckdb.CatalogException:
|
||||||
|
name_rows = []
|
||||||
|
slug_to_name: dict[str, str] = {r[0]: r[1] for r in name_rows}
|
||||||
|
|
||||||
|
grouped: dict[str, dict[str, Any]] = {}
|
||||||
|
for plugin in allowed:
|
||||||
|
slug = plugin["marketplace_slug"]
|
||||||
|
bucket = grouped.setdefault(
|
||||||
|
slug,
|
||||||
|
{
|
||||||
|
"slug": slug,
|
||||||
|
"name": slug_to_name.get(slug, slug),
|
||||||
|
"plugins": [],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
bucket["plugins"].append({"name": plugin["original_name"]})
|
||||||
|
|
||||||
|
return list(grouped.values())
|
||||||
|
|
||||||
|
|
||||||
|
def build_claude_md_context(
|
||||||
|
conn: duckdb.DuckDBPyConnection,
|
||||||
|
*,
|
||||||
|
user: dict[str, Any],
|
||||||
|
server_url: str,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Compose the Jinja2 render context for the CLAUDE.md template. Pure, no side effects."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
parsed = urlparse(server_url)
|
||||||
|
return {
|
||||||
|
"instance": {
|
||||||
|
"name": get_instance_name(),
|
||||||
|
"subtitle": get_instance_subtitle(),
|
||||||
|
},
|
||||||
|
"server": {
|
||||||
|
"url": server_url,
|
||||||
|
"hostname": parsed.hostname or "",
|
||||||
|
},
|
||||||
|
"sync_interval": get_sync_interval(),
|
||||||
|
"data_source": {"type": get_data_source_type()},
|
||||||
|
"tables": _list_tables(conn),
|
||||||
|
"metrics": _metrics_summary(conn),
|
||||||
|
"marketplaces": _marketplaces_for_user(conn, user),
|
||||||
|
"user": {
|
||||||
|
"id": user.get("id", ""),
|
||||||
|
"email": user.get("email", ""),
|
||||||
|
"name": user.get("name") or "",
|
||||||
|
"is_admin": bool(user.get("is_admin")),
|
||||||
|
"groups": user.get("groups") or [],
|
||||||
|
},
|
||||||
|
"now": now,
|
||||||
|
"today": date.today().isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compute_default_claude_md(
|
||||||
|
conn: duckdb.DuckDBPyConnection,
|
||||||
|
*,
|
||||||
|
user: dict[str, Any],
|
||||||
|
server_url: str,
|
||||||
|
) -> str:
|
||||||
|
"""Return the rendered default CLAUDE.md from config/claude_md_template.txt.
|
||||||
|
|
||||||
|
Renders the shipped Jinja2 template with the given user's RBAC context.
|
||||||
|
On TemplateError, raises — callers that want graceful fallback should catch.
|
||||||
|
"""
|
||||||
|
source = _load_default_template()
|
||||||
|
env = Environment(undefined=StrictUndefined, autoescape=False)
|
||||||
|
template = env.from_string(source)
|
||||||
|
return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
|
||||||
|
|
||||||
|
|
||||||
|
def render_claude_md(
|
||||||
|
conn: duckdb.DuckDBPyConnection,
|
||||||
|
*,
|
||||||
|
user: dict[str, Any],
|
||||||
|
server_url: str,
|
||||||
|
) -> str:
|
||||||
|
"""Resolve the active template (override or default) and render it for the given user.
|
||||||
|
|
||||||
|
When an admin override is set, renders it via Jinja2 (StrictUndefined, autoescape=False).
|
||||||
|
When no override is set, renders the shipped default template.
|
||||||
|
|
||||||
|
On TemplateError, raises — the API layer catches this and returns 400/500.
|
||||||
|
"""
|
||||||
|
row = ClaudeMdTemplateRepository(conn).get()
|
||||||
|
source = row["content"] if row.get("content") else _load_default_template()
|
||||||
|
env = Environment(undefined=StrictUndefined, autoescape=False)
|
||||||
|
template = env.from_string(source)
|
||||||
|
return template.render(**build_claude_md_context(conn, user=user, server_url=server_url))
|
||||||
32
src/db.py
32
src/db.py
|
|
@ -39,7 +39,7 @@ def _maybe_instrument(con, db_tag: str):
|
||||||
|
|
||||||
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||||
|
|
||||||
SCHEMA_VERSION = 22
|
SCHEMA_VERSION = 23
|
||||||
|
|
||||||
_SYSTEM_SCHEMA = """
|
_SYSTEM_SCHEMA = """
|
||||||
CREATE TABLE IF NOT EXISTS schema_version (
|
CREATE TABLE IF NOT EXISTS schema_version (
|
||||||
|
|
@ -427,6 +427,18 @@ CREATE TABLE IF NOT EXISTS setup_banner (
|
||||||
updated_by VARCHAR,
|
updated_by VARCHAR,
|
||||||
CONSTRAINT singleton CHECK (id = 1)
|
CONSTRAINT singleton CHECK (id = 1)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- v23: customizable analyst-workspace CLAUDE.md template.
|
||||||
|
-- Singleton row (id=1). NULL content means "use the default template
|
||||||
|
-- shipped at config/claude_md_template.txt" (Jinja2 markdown). Admin override
|
||||||
|
-- stores the raw Jinja2 source string.
|
||||||
|
CREATE TABLE IF NOT EXISTS claude_md_template (
|
||||||
|
id INTEGER PRIMARY KEY DEFAULT 1,
|
||||||
|
content TEXT,
|
||||||
|
updated_at TIMESTAMP,
|
||||||
|
updated_by VARCHAR,
|
||||||
|
CONSTRAINT singleton CHECK (id = 1)
|
||||||
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1658,6 +1670,17 @@ _V21_TO_V22_MIGRATIONS = [
|
||||||
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
|
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_V22_TO_V23_MIGRATIONS = [
|
||||||
|
"""CREATE TABLE IF NOT EXISTS claude_md_template (
|
||||||
|
id INTEGER PRIMARY KEY DEFAULT 1,
|
||||||
|
content TEXT,
|
||||||
|
updated_at TIMESTAMP,
|
||||||
|
updated_by VARCHAR,
|
||||||
|
CONSTRAINT singleton CHECK (id = 1)
|
||||||
|
)""",
|
||||||
|
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) ON CONFLICT (id) DO NOTHING",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||||
"""Create tables if they don't exist. Apply migrations if schema version changed.
|
"""Create tables if they don't exist. Apply migrations if schema version changed.
|
||||||
|
|
@ -1724,6 +1747,10 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||||
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) "
|
"INSERT INTO setup_banner (id, content) VALUES (1, NULL) "
|
||||||
"ON CONFLICT (id) DO NOTHING"
|
"ON CONFLICT (id) DO NOTHING"
|
||||||
)
|
)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
|
||||||
|
"ON CONFLICT (id) DO NOTHING"
|
||||||
|
)
|
||||||
# Fresh-install seed is handled by the unconditional
|
# Fresh-install seed is handled by the unconditional
|
||||||
# _seed_core_roles call at the bottom of _ensure_schema —
|
# _seed_core_roles call at the bottom of _ensure_schema —
|
||||||
# left as a no-op branch here so the migration ladder still
|
# left as a no-op branch here so the migration ladder still
|
||||||
|
|
@ -1807,6 +1834,9 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||||
if current < 22:
|
if current < 22:
|
||||||
for sql in _V21_TO_V22_MIGRATIONS:
|
for sql in _V21_TO_V22_MIGRATIONS:
|
||||||
conn.execute(sql)
|
conn.execute(sql)
|
||||||
|
if current < 23:
|
||||||
|
for sql in _V22_TO_V23_MIGRATIONS:
|
||||||
|
conn.execute(sql)
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE schema_version SET version = ?, applied_at = current_timestamp",
|
"UPDATE schema_version SET version = ?, applied_at = current_timestamp",
|
||||||
[SCHEMA_VERSION],
|
[SCHEMA_VERSION],
|
||||||
|
|
|
||||||
53
src/repositories/claude_md_template.py
Normal file
53
src/repositories/claude_md_template.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
"""Repository for the per-instance CLAUDE.md template override (singleton row)."""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
|
||||||
|
|
||||||
|
class ClaudeMdTemplateRepository:
|
||||||
|
def __init__(self, conn: duckdb.DuckDBPyConnection):
|
||||||
|
self.conn = conn
|
||||||
|
|
||||||
|
def get(self) -> dict[str, Any]:
|
||||||
|
"""Return the singleton row. Always exists post-migration; content
|
||||||
|
is None when no override is set (= use shipped default template)."""
|
||||||
|
row = self.conn.execute(
|
||||||
|
"SELECT id, content, updated_at, updated_by FROM claude_md_template WHERE id = 1"
|
||||||
|
).fetchone()
|
||||||
|
if row is None:
|
||||||
|
# Defensive: re-seed if a previous admin manually deleted it.
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT INTO claude_md_template (id, content) VALUES (1, NULL) "
|
||||||
|
"ON CONFLICT (id) DO NOTHING"
|
||||||
|
)
|
||||||
|
return {"id": 1, "content": None, "updated_at": None, "updated_by": None}
|
||||||
|
return {
|
||||||
|
"id": row[0],
|
||||||
|
"content": row[1],
|
||||||
|
"updated_at": row[2],
|
||||||
|
"updated_by": row[3],
|
||||||
|
}
|
||||||
|
|
||||||
|
def set(self, content: str, *, updated_by: str) -> None:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
self.conn.execute(
|
||||||
|
"""INSERT INTO claude_md_template (id, content, updated_at, updated_by)
|
||||||
|
VALUES (1, ?, ?, ?)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
content = excluded.content,
|
||||||
|
updated_at = excluded.updated_at,
|
||||||
|
updated_by = excluded.updated_by""",
|
||||||
|
[content, now, updated_by],
|
||||||
|
)
|
||||||
|
|
||||||
|
def reset(self, *, updated_by: str) -> None:
|
||||||
|
"""Clear override; renderer falls back to shipped default template."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
self.conn.execute(
|
||||||
|
"""UPDATE claude_md_template
|
||||||
|
SET content = NULL, updated_at = ?, updated_by = ?
|
||||||
|
WHERE id = 1""",
|
||||||
|
[now, updated_by],
|
||||||
|
)
|
||||||
169
tests/test_claude_md_renderer.py
Normal file
169
tests/test_claude_md_renderer.py
Normal file
|
|
@ -0,0 +1,169 @@
|
||||||
|
"""Unit tests for the analyst-workspace CLAUDE.md renderer (src/claude_md.py)."""
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
import pytest
|
||||||
|
from jinja2 import TemplateError
|
||||||
|
|
||||||
|
from src.db import _ensure_schema
|
||||||
|
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||||
|
from src.claude_md import (
|
||||||
|
build_claude_md_context,
|
||||||
|
compute_default_claude_md,
|
||||||
|
render_claude_md,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def conn(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||||||
|
db_path = tmp_path / "system.duckdb"
|
||||||
|
c = duckdb.connect(str(db_path))
|
||||||
|
_ensure_schema(c)
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _user(email="alice@example.com", is_admin=False):
|
||||||
|
return {
|
||||||
|
"id": "u1",
|
||||||
|
"email": email,
|
||||||
|
"name": "Alice",
|
||||||
|
"is_admin": is_admin,
|
||||||
|
"groups": ["Everyone"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Default (no override) — renders a non-empty markdown string
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_compute_default_returns_non_empty(conn):
|
||||||
|
out = compute_default_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert out.strip() != ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_contains_server_url(conn):
|
||||||
|
out = compute_default_claude_md(conn, user=_user(), server_url="https://myagnes.example.com")
|
||||||
|
assert "https://myagnes.example.com" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_contains_user_reference(conn):
|
||||||
|
# The footer uses `user.name or user.email` — a user with no name falls back to email.
|
||||||
|
user_no_name = {"id": "u1", "email": "bob@example.com", "name": "", "is_admin": False, "groups": []}
|
||||||
|
out = compute_default_claude_md(conn, user=user_no_name, server_url="https://example.com")
|
||||||
|
assert "bob@example.com" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_uses_default_when_no_override(conn):
|
||||||
|
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert out.strip() != ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Override renders correctly
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_render_uses_override_when_set(conn):
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"# {{ instance.name }} Workspace\n\nHello {{ user.email }}.",
|
||||||
|
updated_by="admin@example.com",
|
||||||
|
)
|
||||||
|
out = render_claude_md(conn, user=_user("charlie@example.com"), server_url="https://example.com")
|
||||||
|
assert "charlie@example.com" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_override_tables_list(conn):
|
||||||
|
# Seed a table registry entry
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO table_registry (id, name, description, query_mode, source_type) "
|
||||||
|
"VALUES ('t1', 'orders', 'All orders', 'local', 'keboola')"
|
||||||
|
)
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"{% for t in tables %}- {{ t.name }}: {{ t.description }}{% endfor %}",
|
||||||
|
updated_by="admin@example.com",
|
||||||
|
)
|
||||||
|
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert "orders" in out
|
||||||
|
assert "All orders" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_override_metrics_summary(conn):
|
||||||
|
# Seed a metric definition — must include NOT NULL columns: display_name, sql
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO metric_definitions (id, name, display_name, category, sql) "
|
||||||
|
"VALUES ('m1', 'mrr', 'MRR', 'revenue', 'SELECT SUM(amount)')"
|
||||||
|
)
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"Metrics: {{ metrics.count }}, cats: {{ metrics.categories | join(', ') }}",
|
||||||
|
updated_by="admin@example.com",
|
||||||
|
)
|
||||||
|
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert "1" in out # 1 metric
|
||||||
|
assert "revenue" in out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# RBAC-filtered marketplaces — two users with different grants render differently
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_marketplaces_empty_for_user_with_no_grants(conn):
|
||||||
|
# No grants seeded — _marketplaces_for_user returns []
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"{% if marketplaces %}HAS_PLUGINS{% else %}NO_PLUGINS{% endif %}",
|
||||||
|
updated_by="admin@example.com",
|
||||||
|
)
|
||||||
|
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert "NO_PLUGINS" in out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Anonymous / minimal user context
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_render_with_minimal_user_context(conn):
|
||||||
|
"""Templates referencing user fields must work with minimal user dict."""
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"User: {{ user.email }}, admin: {{ user.is_admin }}",
|
||||||
|
updated_by="admin@example.com",
|
||||||
|
)
|
||||||
|
out = render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert "alice@example.com" in out
|
||||||
|
assert "False" in out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Build context shape
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_context_exposes_all_documented_keys(conn):
|
||||||
|
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||||
|
for key in ("instance", "server", "sync_interval", "data_source", "tables", "metrics", "marketplaces", "user", "now", "today"):
|
||||||
|
assert key in ctx, f"missing context key: {key}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_context_tables_is_list(conn):
|
||||||
|
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert isinstance(ctx["tables"], list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_context_metrics_shape(conn):
|
||||||
|
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert "count" in ctx["metrics"]
|
||||||
|
assert "categories" in ctx["metrics"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_context_marketplaces_is_list(conn):
|
||||||
|
ctx = build_claude_md_context(conn, user=_user(), server_url="https://example.com")
|
||||||
|
assert isinstance(ctx["marketplaces"], list)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Render failure raises (caller handles)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_render_raises_on_template_error(conn):
|
||||||
|
ClaudeMdTemplateRepository(conn).set(
|
||||||
|
"{{ does_not_exist }}", updated_by="admin@example.com"
|
||||||
|
)
|
||||||
|
with pytest.raises(TemplateError):
|
||||||
|
render_claude_md(conn, user=_user(), server_url="https://example.com")
|
||||||
40
tests/test_claude_md_template_repo.py
Normal file
40
tests/test_claude_md_template_repo.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
"""Unit tests for ClaudeMdTemplateRepository."""
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.db import _ensure_schema
|
||||||
|
from src.repositories.claude_md_template import ClaudeMdTemplateRepository
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def conn(tmp_path):
|
||||||
|
db_path = tmp_path / "system.duckdb"
|
||||||
|
c = duckdb.connect(str(db_path))
|
||||||
|
_ensure_schema(c)
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_returns_none_on_fresh_install(conn):
|
||||||
|
repo = ClaudeMdTemplateRepository(conn)
|
||||||
|
row = repo.get()
|
||||||
|
assert row is not None
|
||||||
|
assert row["content"] is None # default sentinel
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_stores_content(conn):
|
||||||
|
repo = ClaudeMdTemplateRepository(conn)
|
||||||
|
repo.set("# {{ instance.name }}", updated_by="admin@example.com")
|
||||||
|
row = repo.get()
|
||||||
|
assert row["content"] == "# {{ instance.name }}"
|
||||||
|
assert row["updated_by"] == "admin@example.com"
|
||||||
|
assert row["updated_at"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_reset_clears_content(conn):
|
||||||
|
repo = ClaudeMdTemplateRepository(conn)
|
||||||
|
repo.set("custom template", updated_by="admin@example.com")
|
||||||
|
repo.reset(updated_by="admin@example.com")
|
||||||
|
row = repo.get()
|
||||||
|
assert row["content"] is None
|
||||||
|
|
@ -13,8 +13,8 @@ import duckdb
|
||||||
from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version
|
from src.db import SCHEMA_VERSION, _ensure_schema, get_schema_version
|
||||||
|
|
||||||
|
|
||||||
def test_schema_version_is_22():
|
def test_schema_version_is_23():
|
||||||
assert SCHEMA_VERSION == 22
|
assert SCHEMA_VERSION == 23
|
||||||
|
|
||||||
|
|
||||||
def test_v20_adds_source_query(tmp_path):
|
def test_v20_adds_source_query(tmp_path):
|
||||||
|
|
@ -29,7 +29,29 @@ def test_v20_adds_source_query(tmp_path):
|
||||||
).fetchall()
|
).fetchall()
|
||||||
}
|
}
|
||||||
assert "source_query" in cols, f"source_query missing from {cols}"
|
assert "source_query" in cols, f"source_query missing from {cols}"
|
||||||
assert get_schema_version(conn) == 22
|
assert get_schema_version(conn) == 23
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_v23_adds_claude_md_template(tmp_path):
|
||||||
|
"""v23 must create the claude_md_template singleton table."""
|
||||||
|
db_path = tmp_path / "system.duckdb"
|
||||||
|
conn = duckdb.connect(str(db_path))
|
||||||
|
_ensure_schema(conn)
|
||||||
|
|
||||||
|
tables = {
|
||||||
|
r[0] for r in conn.execute(
|
||||||
|
"SELECT table_name FROM information_schema.tables "
|
||||||
|
"WHERE table_schema = 'main'"
|
||||||
|
).fetchall()
|
||||||
|
}
|
||||||
|
assert "claude_md_template" in tables, f"claude_md_template missing from {tables}"
|
||||||
|
|
||||||
|
# Singleton row seeded
|
||||||
|
row = conn.execute("SELECT id, content FROM claude_md_template WHERE id = 1").fetchone()
|
||||||
|
assert row is not None
|
||||||
|
assert row[0] == 1
|
||||||
|
assert row[1] is None # default = no override
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -61,7 +83,7 @@ def test_v19_db_migrates_to_v20(tmp_path):
|
||||||
|
|
||||||
_ensure_schema(conn)
|
_ensure_schema(conn)
|
||||||
|
|
||||||
assert get_schema_version(conn) == 22
|
assert get_schema_version(conn) == 23
|
||||||
cols = {
|
cols = {
|
||||||
r[0] for r in conn.execute(
|
r[0] for r in conn.execute(
|
||||||
"SELECT column_name FROM information_schema.columns "
|
"SELECT column_name FROM information_schema.columns "
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue