"""Corporate memory endpoints — knowledge items, voting, governance admin, contradictions.""" import asyncio import json import logging import uuid from typing import Optional, List from fastapi import APIRouter, Depends, HTTPException, Response from pydantic import BaseModel, ConfigDict, Field import duckdb from app.auth.dependencies import get_current_user, _get_db from app.auth.access import require_admin, is_user_admin, can_access from src.repositories.knowledge import KnowledgeRepository from src.repositories.memory_domains import MemoryDomainsRepository from src.repositories.audit import AuditRepository logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/memory", tags=["memory"]) # v49: ``mandatory`` is no longer a lifecycle status — Required tier rides on # ``knowledge_items.is_required``. ``status`` covers lifecycle only (pending, # approved, rejected, revoked, expired). VALID_STATUSES = ["pending", "approved", "rejected", "revoked", "expired"] BUNDLE_TOKEN_BUDGET = 6000 # Rough chars-per-token estimate (conservative). _CHARS_PER_TOKEN = 4 # v49: domain set is no longer a hardcoded enum — it lives in the # ``memory_domains`` table and is administrable via /admin/memory-domains. # Validation uses ``MemoryDomainsRepository.exists_by_slug``. def _validate_domain_slug(slug: Optional[str], conn: duckdb.DuckDBPyConnection) -> None: """Raise 400 if ``slug`` is truthy but doesn't resolve to a memory_domains row.""" if not slug: return if not MemoryDomainsRepository(conn).exists_by_slug(slug): raise HTTPException( status_code=400, detail=f"Unknown memory domain slug: {slug!r}", ) # API-layer allowlist for ``POST /api/memory/admin/bulk-update``. The repo's # ``_UPDATABLE_FIELDS`` is intentionally broader (``status``, ``sensitivity``, # ``is_personal``, ``confidence``, valid_from/until, supersedes, etc.) so the # narrow per-item ``update`` path can still touch them; bulk-edit must NOT, # because changing status / personal-flag / sensitivity in bulk bypasses the # proper governance flow (``/admin/mandate``, ``/admin/revoke``, # ``/{id}/personal``) and its dedicated audit rows. Callers that need those # fields in bulk should use the per-item endpoints. See PR #126 review. _BULK_UPDATE_ALLOWED = frozenset({ "category", "domain", "tags", "tags_add", "tags_remove", "audience", "title", "content", }) def _is_privileged_viewer(user: dict, conn: duckdb.DuckDBPyConnection) -> bool: """Admins (members of the Admin system group, per RBAC v13) are the privileged viewer tier. Pre-v13 the schema also had a km_admin role; v13 collapsed the role hierarchy into groups, so the corporate-memory admin capability now lives on top of plain admin membership. Module authors needing a finer-grained gate (curator-only, etc.) should add a ``ResourceType.CORPORATE_MEMORY_ADMIN`` resource type and gate with ``require_resource_access`` instead of extending this helper.""" user_id = user.get("id") if not user_id: return False return is_user_admin(user_id, conn) def _effective_groups( user: dict, conn: duckdb.DuckDBPyConnection ) -> Optional[List[str]]: """Audience-filter group list for the caller, or ``None`` for admins (no filter — see all items regardless of audience). Reads from ``user_group_members`` JOIN ``user_groups`` (the v13 model). Pre-v13 this read ``users.groups`` JSON; that column was dropped in v13 and the membership is now materialized in ``user_group_members`` with a ``source`` discriminator (admin / google_sync / system_seed). """ if _is_privileged_viewer(user, conn): return None user_id = user.get("id") if not user_id: return [] rows = conn.execute( """SELECT g.name FROM user_group_members m JOIN user_groups g ON m.group_id = g.id WHERE m.user_id = ?""", [user_id], ).fetchall() return [f"group:{r[0]}" for r in rows] def _caller_granted_memory_domains( user: dict, conn: duckdb.DuckDBPyConnection, ) -> Optional[List[str]]: """Domains the caller has been granted access to via resource_grants. The grant model is generic — admins assign ``MEMORY_DOMAIN`` resources (e.g. ``md_finance``) to ``user_groups`` rows via ``/admin/access``. This helper resolves the caller's group memberships against ``resource_grants`` and returns the union of ``memory_domains.id`` values (v49: the migration re-pointed grants from slug to id). Returns ``None`` for privileged viewers (admins see everything regardless of grants — same convention as ``_effective_groups``). Returns an empty list when the caller has no grants — the SQL EXISTS-join collapses in that case, preserving pre-RBAC behaviour. """ if _is_privileged_viewer(user, conn): return None user_id = user.get("id") if not user_id: return [] rows = conn.execute( """SELECT DISTINCT rg.resource_id FROM resource_grants rg JOIN user_group_members m ON m.group_id = rg.group_id WHERE m.user_id = ? AND rg.resource_type = 'memory_domain'""", [user_id], ).fetchall() return [r[0] for r in rows] def _can_view_item(user: dict, item: dict, is_priv: bool) -> bool: """Personal items are visible only to the contributor and privileged viewers. Non-personal items are visible to any authenticated user. ``is_priv`` is pre-computed by the caller (one DB hit per request) so a per-item loop doesn't re-query ``user_group_members`` for every row. """ if not item.get("is_personal"): return True if is_priv: return True return item.get("source_user") == user.get("email") class CreateKnowledgeRequest(BaseModel): title: str content: str # Allow callers to POST either `domain_slug` (new canonical name, # matching admin/repo/template layers) or `domain` (legacy alias kept # for one release so existing API callers don't break — Pydantic v2 # accepts the alias on input, Python code reads `request.domain_slug`). model_config = ConfigDict(populate_by_name=True) category: str tags: Optional[List[str]] = None domain_slug: Optional[str] = Field(default=None, alias="domain") entities: Optional[List[str]] = None source_type: Optional[str] = None class VoteRequest(BaseModel): vote: int class PersonalFlagRequest(BaseModel): is_personal: bool class AdminActionRequest(BaseModel): reason: Optional[str] = None audience: Optional[str] = None class EditRequest(BaseModel): title: Optional[str] = None content: Optional[str] = None class BatchActionRequest(BaseModel): item_ids: List[str] action: str # approve, reject, mandate, revoke reason: Optional[str] = None audience: Optional[str] = None class ResolveContradictionRequest(BaseModel): resolution: str # kept_a, kept_b, merged, both_valid class CreateContradictionRequest(BaseModel): item_a_id: str item_b_id: str explanation: str severity: Optional[str] = None suggested_resolution: Optional[str] = None class PatchItemRequest(BaseModel): """Partial update for a knowledge item via PATCH /api/memory/admin/{id}. Replaces the narrow ``EditRequest`` (title + content only). Any field left as ``None`` is unchanged. Domain is validated against ``VALID_DOMAINS`` when supplied. ``domain_ids`` is the M:N junction write path (knowledge_item_domains) used by the admin item-edit modal's chip-input — pass a list of memory_domains.id strings and the endpoint replaces the item's full domain membership atomically. Empty list ``[]`` clears all memberships. Supplying both ``domain`` and ``domain_ids`` is allowed (the legacy single ``domain`` write happens first, the junction replace overrides it). """ title: Optional[str] = None content: Optional[str] = None category: Optional[str] = None domain: Optional[str] = None domain_ids: Optional[List[str]] = None tags: Optional[List[str]] = None audience: Optional[str] = None class BulkUpdateRequest(BaseModel): """Apply ``updates`` to every id in ``item_ids``. Issue #62.""" item_ids: List[str] updates: dict class ResolveDuplicateRequest(BaseModel): """Resolve a duplicate-candidate relation row. ``resolution`` is one of ``duplicate`` / ``different`` / ``dismissed`` (decision 2 in issue #62 — no auto-merge action; merging is a separate larger feature). """ resolution: str # ---- Memory domain catalog (v49 — frontend typeahead + admin dropdowns) ---- @router.get("/domains") async def list_memory_domains( user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """List all memory domains for chip-input typeahead + dropdown population. v49: replaces the hardcoded ``VALID_DOMAINS`` constant. Returns every row in ``memory_domains`` (admin-administered + the six canonical seed rows) so the frontend can render the picker without a separate /admin endpoint. Authenticated users only — domain catalog is non-sensitive metadata that powers the item-edit UI. """ domains = MemoryDomainsRepository(conn).list() return { "domains": [ { "id": d["id"], "slug": d["slug"], "name": d["name"], "description": d["description"], "icon": d["icon"], "color": d["color"], } for d in domains ] } # ---- User endpoints ---- @router.get("") async def list_knowledge( status_filter: Optional[str] = None, category: Optional[str] = None, domain: Optional[str] = None, source_type: Optional[str] = None, search: Optional[str] = None, exclude_personal: bool = True, upvoted_by_me: bool = False, hide_dismissed: bool = False, is_required: Optional[bool] = None, page: int = 1, per_page: int = 50, sort: str = "updated_at", user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """List knowledge items with filtering, pagination, search. ``upvoted_by_me=true`` narrows to items the caller upvoted (powers the "My Upvotes" filter on /corporate-memory — replaces the old dead "My Rules" category sentinel). """ repo = KnowledgeRepository(conn) page = max(page, 1) offset = (page - 1) * per_page # Privacy: non-privileged viewers can never opt out of the personal filter. # Their own personal contributions are visible via /my-contributions, not here. effective_exclude_personal = True if not _is_privileged_viewer(user, conn) else exclude_personal effective_groups = _effective_groups(user, conn) granted_domains = _caller_granted_memory_domains(user, conn) statuses = [status_filter] if status_filter else None upvoted_by_user_id = user["id"] if upvoted_by_me else None # v46: caller's id is plumbed to repo filters when hide_dismissed=True so # the SQL can NOT-EXISTS-subquery against knowledge_item_user_dismissed. # Mandatory items are exempted by the subquery's status guard. dismissed_by_user_id = user["id"] if search: items = repo.search( search, exclude_personal=effective_exclude_personal, user_groups=effective_groups, granted_domains=granted_domains, statuses=statuses, category=category, domain=domain, source_type=source_type, is_required=is_required, dismissed_by_user=dismissed_by_user_id, hide_dismissed=hide_dismissed, limit=per_page, offset=offset, ) if upvoted_by_user_id: # Best-effort post-filter for the search() path (which doesn't # plumb the upvote filter into its SQL). Search + "My Upvotes" # is rare enough that a post-filter is fine. upvoted_ids = { r[0] for r in conn.execute( "SELECT item_id FROM knowledge_votes WHERE user_id = ? AND vote > 0", [upvoted_by_user_id], ).fetchall() } items = [it for it in items if it["id"] in upvoted_ids] else: items = repo.list_items( statuses=statuses, category=category, domain=domain, source_type=source_type, is_required=is_required, exclude_personal=effective_exclude_personal, user_groups=effective_groups, granted_domains=granted_domains, upvoted_by_user=upvoted_by_user_id, dismissed_by_user=dismissed_by_user_id, hide_dismissed=hide_dismissed, limit=per_page, offset=offset, ) # Enrich with votes + per-user dismissal flag. The set lookup keeps the # per-item annotation O(1); the frontend uses ``dismissed_by_me`` to # render the gray-out state without a separate roundtrip. dismissed_set = set(repo.list_dismissed_ids(user["id"])) for item in items: votes = repo.get_votes(item["id"]) item["upvotes"] = votes["upvotes"] item["downvotes"] = votes["downvotes"] item["score"] = votes["upvotes"] - votes["downvotes"] item["dismissed_by_me"] = item["id"] in dismissed_set import math total_count = repo.count_items( search=search, statuses=statuses, category=category, domain=domain, source_type=source_type, is_required=is_required, exclude_personal=effective_exclude_personal, user_groups=effective_groups, granted_domains=granted_domains, dismissed_by_user=dismissed_by_user_id, hide_dismissed=hide_dismissed, ) total_pages = math.ceil(total_count / per_page) if per_page > 0 else 1 return { "items": items, "count": len(items), "page": page, "per_page": per_page, "total_count": total_count, "total_pages": total_pages, } @router.get("/stats") async def get_stats( user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get corporate memory statistics. Aggregations exclude personal items for non-privileged callers — otherwise `total` and the `by_*` counts would change in observable ways when a colleague flags or unflags a personal item, leaking existence info per ADR Decision 1. Uses SQL aggregation rather than ``repo.list_items()`` to keep the endpoint cheap on large knowledge bases (the loader path materializes every row + parses JSON tags/contributors per row, which blocks the event loop on N>1k items). Audience filter mirrors what list_items applies: ``audience IS NULL OR audience = 'all'`` plus, for non-admins, membership in any of the caller's group-prefixed audiences. """ is_priv = _is_privileged_viewer(user, conn) groups = _effective_groups(user, conn) granted_domains = _caller_granted_memory_domains(user, conn) where_clauses: List[str] = [] params: list = [] if not is_priv: # Personal-item privacy: non-privileged callers see no personal items # in the aggregate, even their own. /my-contributions is the canonical # surface for a user's personal contributions; including them here # would make /api/memory/stats.total disagree with the count visible # via GET /api/memory (which forces exclude_personal=True for non- # admins regardless of source_user). where_clauses.append("(is_personal IS NULL OR is_personal = FALSE)") if groups is not None: # Mirror the visibility composition KnowledgeRepository.list_items # uses: audience match OR MEMORY_DOMAIN grant. Without this the # stats `total` diverges from the list endpoint's `total_count` for # non-admin users with grants. v49: granted_domains values are # ``memory_domains.id`` and resolve via the junction EXISTS subquery. visibility = ["audience IS NULL", "audience = 'all'"] if groups: placeholders = ",".join(["?"] * len(groups)) visibility.append(f"audience IN ({placeholders})") params.extend(groups) if granted_domains: domain_placeholders = ",".join(["?"] * len(granted_domains)) visibility.append( "EXISTS (SELECT 1 FROM knowledge_item_domains kid " "WHERE kid.item_id = knowledge_items.id " f"AND kid.domain_id IN ({domain_placeholders}))" ) params.extend(granted_domains) where_clauses.append("(" + " OR ".join(visibility) + ")") where_sql = (" WHERE " + " AND ".join(where_clauses)) if where_clauses else "" total = conn.execute( f"SELECT COUNT(*) FROM knowledge_items{where_sql}", params ).fetchone()[0] or 0 by_status_rows = conn.execute( f"SELECT COALESCE(status, 'unknown') AS s, COUNT(*) " f"FROM knowledge_items{where_sql} GROUP BY s", params, ).fetchall() by_status = {r[0]: r[1] for r in by_status_rows} cat_rows = conn.execute( f"SELECT DISTINCT category FROM knowledge_items{where_sql} " f"{'AND' if where_sql else 'WHERE'} category IS NOT NULL", params, ).fetchall() categories = sorted(r[0] for r in cat_rows if r[0]) # v49: domain lives in the junction. LEFT JOIN to surface 'unset' bucket # for items without any domain row, matching the pre-v49 COALESCE behavior. by_domain_rows = conn.execute( "SELECT COALESCE(md.slug, 'unset') AS d, COUNT(*) " "FROM knowledge_items " "LEFT JOIN knowledge_item_domains kid ON kid.item_id = knowledge_items.id " "LEFT JOIN memory_domains md ON md.id = kid.domain_id" + (where_sql or "") + " GROUP BY d", params, ).fetchall() by_domain = {r[0]: r[1] for r in by_domain_rows} by_source_rows = conn.execute( f"SELECT COALESCE(source_type, 'unknown') AS st, COUNT(*) " f"FROM knowledge_items{where_sql} GROUP BY st", params, ).fetchall() by_source_type = {r[0]: r[1] for r in by_source_rows} # by_tag + by_audience extend stats for the chip-filter UI (issue #62). # The repo helpers honor the same audience + personal-item filters this # endpoint applies above. repo = KnowledgeRepository(conn) exclude_personal_for_caller = not is_priv by_tag = repo.count_by_tag( exclude_personal=exclude_personal_for_caller, user_groups=groups, granted_domains=granted_domains, ) by_audience = repo.count_by_audience( exclude_personal=exclude_personal_for_caller, user_groups=groups, granted_domains=granted_domains, ) return { "total": total, "by_status": by_status, "categories": categories, "by_domain": by_domain, "by_source_type": by_source_type, "by_tag": by_tag, "by_audience": by_audience, } @router.post("", status_code=201) async def create_knowledge( request: CreateKnowledgeRequest, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): # Mirror the validation already enforced by PATCH /admin/{id} and bulk-update # so an item can't be created with a domain it can't be patched to. Empty / # missing domain is fine — only reject non-empty values outside the allowlist. # See PR #126 review. _validate_domain_slug(request.domain_slug, conn) repo = KnowledgeRepository(conn) item_id = str(uuid.uuid4()) # Best-effort auto-tagging — runs only when an LLM extractor is configured. tags = list(request.tags) if request.tags else [] try: from config.loader import load_instance_config from connectors.llm import create_extractor from services.corporate_memory.tagger import auto_tag_items cfg = load_instance_config() ai_cfg = cfg.get("ai") if ai_cfg: extractor = create_extractor(ai_cfg) stub = [{"id": item_id, "title": request.title, "content": request.content}] assignments = await asyncio.to_thread(auto_tag_items, stub, extractor) topics = assignments.get(item_id, []) if topics: seen: set[str] = set() merged: list[str] = [] for t in topics + tags: if t not in seen: seen.add(t) merged.append(t) tags = merged except Exception: pass # tagging is non-critical — never block item creation create_kwargs = dict( id=item_id, title=request.title, content=request.content, category=request.category, source_user=user.get("email"), tags=tags or None, domain=request.domain_slug, entities=request.entities, confidence=0.50, ) if request.source_type: create_kwargs["source_type"] = request.source_type repo.create(**create_kwargs) return {"id": item_id, "status": "pending"} @router.post("/{item_id}/vote") async def vote_knowledge( item_id: str, request: VoteRequest, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): if request.vote not in (1, -1, 0): raise HTTPException(status_code=400, detail="Vote must be 1, -1, or 0 (retract)") repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)): raise HTTPException(status_code=404, detail="Knowledge item not found") if request.vote == 0: repo.unvote(item_id, user["id"]) else: repo.vote(item_id, user["id"], request.vote) return repo.get_votes(item_id) @router.get("/my-votes") async def get_my_votes( user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get current user's votes on all items.""" results = conn.execute( "SELECT item_id, vote FROM knowledge_votes WHERE user_id = ?", [user["id"]] ).fetchall() return {row[0]: row[1] for row in results} @router.get("/my-contributions") async def get_my_contributions( user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get knowledge items contributed by the current user.""" repo = KnowledgeRepository(conn) email = user.get("email", "") items = repo.get_user_contributions(email) for item in items: votes = repo.get_votes(item["id"]) item["upvotes"] = votes["upvotes"] item["downvotes"] = votes["downvotes"] item["score"] = votes["upvotes"] - votes["downvotes"] return {"items": items, "count": len(items)} @router.post("/{item_id}/personal") async def toggle_personal_flag( item_id: str, request: PersonalFlagRequest, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Toggle personal/excluded flag on a knowledge item (only by the contributor).""" repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item: raise HTTPException(status_code=404, detail="Knowledge item not found") if item.get("source_user") != user.get("email"): raise HTTPException(status_code=403, detail="Only the contributor can flag personal items") repo.set_personal(item_id, request.is_personal) return {"id": item_id, "is_personal": request.is_personal} @router.post("/{item_id}/dismiss") async def dismiss_item( item_id: str, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Per-user opt-out — remove an item from the caller's AI bundle. Idempotent: re-dismissing an already-dismissed item is a no-op success. Mandatory items can never be dismissed — the governance hard rule — so a POST against one returns 400 with a clear detail message. """ repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)): raise HTTPException(status_code=404, detail="Knowledge item not found") # v49: Required tier rides on ``is_required`` (was status='mandatory'). if item.get("is_required") is True: raise HTTPException(status_code=400, detail="Cannot dismiss a mandatory item") repo.dismiss(user["id"], item_id) # v49 Section 9.2 — telemetry. domain_ids surfaces the per-item domain # membership so /admin/telemetry can correlate dismissals with the # domain they came from. try: from src.repositories.memory_domains import MemoryDomainsRepository from src.repositories.usage import UsageRepository domain_ids = [ d["id"] for d in MemoryDomainsRepository(conn).list_domains_of_item(item_id) ] UsageRepository(conn).emit_server_event( event_type="memory.dismiss", user_id=user["id"], username=user.get("email") or user["id"], props={"item_id": item_id, "domain_ids": domain_ids}, ) except Exception: pass return {"id": item_id, "dismissed": True} @router.delete("/{item_id}/dismiss", status_code=204) async def undismiss_item( item_id: str, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Idempotent un-dismiss — a second DELETE still returns 204. Returns 404 if the item itself doesn't exist (consistent with the rest of the per-item endpoints); the dismissal row's existence is not consulted because absence is the success state. """ repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)): raise HTTPException(status_code=404, detail="Knowledge item not found") repo.undismiss(user["id"], item_id) # v49 Section 9.2 — telemetry. Best-effort fire-and-forget. Endpoint # returns 204 No Content (the decorator status_code overrides any # body), so no return value needed; telemetry is the only side effect # we still want. try: from src.repositories.usage import UsageRepository UsageRepository(conn).emit_server_event( event_type="memory.undismiss", user_id=user["id"], username=user.get("email") or user["id"], props={"item_id": item_id}, ) except Exception: pass @router.get("/{item_id}/provenance") async def get_provenance( item_id: str, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get source provenance for a knowledge item.""" repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)): raise HTTPException(status_code=404, detail="Knowledge item not found") return { "id": item_id, "source_type": item.get("source_type"), "source_ref": item.get("source_ref"), "source_user": item.get("source_user"), "confidence": item.get("confidence"), "domain": item.get("domain"), "entities": item.get("entities"), "valid_from": item.get("valid_from"), "valid_until": item.get("valid_until"), "supersedes": item.get("supersedes"), "created_at": item.get("created_at"), } # ---- Admin governance endpoints ---- def _get_item_or_404(repo: KnowledgeRepository, item_id: str) -> dict: item = repo.get_by_id(item_id) if not item: raise HTTPException(status_code=404, detail="Knowledge item not found") return item def _audit_action(conn, admin_email: str, action: str, item_id: str, details: dict = None): """Write an admin governance audit row. Action names use the ``corporate_memory.`` namespace as advertised in the 0.15.0 CHANGELOG. Pre-#62 the code wrote ``km_`` — the audit-tab filter (see ``admin_audit`` below) accepts both prefixes so historical rows still surface. """ audit = AuditRepository(conn) audit.log( user_id=admin_email, action=f"corporate_memory.{action}", resource=item_id, params=details, ) @router.post("/admin/approve") async def admin_approve( item_id: str, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.update_status(item_id, "approved") _audit_action(conn, user["email"], "approve", item_id) return {"id": item_id, "status": "approved"} @router.post("/admin/reject") async def admin_reject( item_id: str, request: AdminActionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.update_status(item_id, "rejected") _audit_action(conn, user["email"], "reject", item_id, {"reason": request.reason}) return {"id": item_id, "status": "rejected"} @router.post("/admin/mandate") async def admin_mandate( item_id: str, request: AdminActionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """v49: Required tier rides on ``knowledge_items.is_required`` boolean — ``status`` is reserved for lifecycle (pending/approved/rejected/revoked/ expired). This endpoint keeps the path stable for back-compat; response shape now surfaces ``is_required: True`` instead of ``status: 'mandatory'``. """ repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.set_is_required(item_id, True) if request.audience is not None: repo.update(item_id, audience=request.audience) _audit_action(conn, user["email"], "mandate", item_id, { "reason": request.reason, "audience": request.audience, }) # v49 Section 9.1 — spec table maps both mark-mandatory and the legacy # mandate endpoint to the canonical ``memory_item.set_required`` action # with a boolean payload so audit consumers can stop splitting on path. try: AuditRepository(conn).log( user_id=user["email"], action="memory_item.set_required", resource=f"knowledge_item:{item_id}", params={"new_value": True}, ) except Exception: pass return {"id": item_id, "is_required": True, "status": "mandatory"} @router.post("/items/{item_id}/mark-mandatory") async def mark_mandatory( item_id: str, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Promote an item to required (``is_required = TRUE``). v49: explicit path-segment variant of the legacy ``/admin/mandate`` query- param endpoint, matching the spec's Section 6 mapping table. Same audit pattern but no audience / reason fields — those stay on /admin/mandate. """ repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.set_is_required(item_id, True) AuditRepository(conn).log( user_id=user["email"], action="memory_item.set_required", resource=f"knowledge_item:{item_id}", params={"new_value": True}, ) return {"id": item_id, "is_required": True} @router.post("/items/{item_id}/mark-unmandatory") async def mark_unmandatory( item_id: str, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Demote an item from required (``is_required = FALSE``). v49 — inverse of mark-mandatory. The item stays in the catalog with its existing ``status`` (typically ``approved``); only the required-tier flag flips. Audit row writes ``memory_item.set_required`` with ``{new_value: false}``. """ repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.set_is_required(item_id, False) AuditRepository(conn).log( user_id=user["email"], action="memory_item.set_required", resource=f"knowledge_item:{item_id}", params={"new_value": False}, ) return {"id": item_id, "is_required": False} @router.post("/admin/revoke") async def admin_revoke( item_id: str, request: AdminActionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) repo.update_status(item_id, "revoked") _audit_action(conn, user["email"], "revoke", item_id, {"reason": request.reason}) return {"id": item_id, "status": "revoked"} @router.post("/admin/edit") async def admin_edit( item_id: str, request: EditRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) updates = {} if request.title is not None: updates["title"] = request.title if request.content is not None: updates["content"] = request.content if updates: repo.update(item_id, **updates) _audit_action(conn, user["email"], "edit", item_id, updates) return {"id": item_id, "updated": list(updates.keys())} @router.post("/admin/batch") async def admin_batch( request: BatchActionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Batch governance action on multiple items. v49: ``mandate`` flips the new ``is_required`` boolean to TRUE (was ``status='mandatory'`` overload). Other actions still drive ``status``. """ repo = KnowledgeRepository(conn) # mandate is special — it writes is_required, not status. All other # actions stay on the status lifecycle column. status_actions = { "approve": "approved", "reject": "rejected", "revoke": "revoked", } if request.action not in (*status_actions, "mandate"): raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}") results = {"success": [], "not_found": []} for item_id in request.item_ids: item = repo.get_by_id(item_id) if not item: results["not_found"].append(item_id) continue if request.action == "mandate": repo.set_is_required(item_id, True) if request.audience is not None: repo.update(item_id, audience=request.audience) else: repo.update_status(item_id, status_actions[request.action]) _audit_action(conn, user["email"], request.action, item_id, { "reason": request.reason, "audience": request.audience, "batch": True, }) results["success"].append(item_id) return results @router.get("/admin/pending") async def admin_pending( category: Optional[str] = None, page: int = 1, per_page: int = 50, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get pending items queue for admin review.""" repo = KnowledgeRepository(conn) page = max(page, 1) offset = (page - 1) * per_page items = repo.list_items(statuses=["pending"], category=category, limit=per_page, offset=offset) return {"items": items, "count": len(items)} @router.get("/admin/audit") async def admin_audit( page: int = 1, per_page: int = 50, action: Optional[str] = None, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Get governance audit log. Filters ``corporate_memory.`` rows AND legacy ``km_`` rows. The dual prefix is here because rows already in the audit log keep the legacy ``km_*`` action name (no migration of historical audit rows — they are write-once); new rows use the ``corporate_memory.*`` namespace. See issue #62 decision E. """ # Pagination: page is 1-indexed; offset must apply to BOTH branches so the # UI's per-page navigation actually returns subsequent rows. Pre-fix, both # SQL paths had LIMIT only and silently returned page 1 for every page. offset = (max(page, 1) - 1) * per_page if action: # Match the action across both prefixes so the per-action filter still # surfaces historical rows. rows = conn.execute( """SELECT * FROM audit_log WHERE action IN (?, ?) ORDER BY timestamp DESC LIMIT ? OFFSET ?""", [f"corporate_memory.{action}", f"km_{action}", per_page, offset], ).fetchall() else: rows = conn.execute( """SELECT * FROM audit_log WHERE action LIKE 'corporate_memory.%' OR action LIKE 'km_%' ORDER BY timestamp DESC LIMIT ? OFFSET ?""", [per_page, offset], ).fetchall() if rows: columns = [desc[0] for desc in conn.description] entries = [dict(zip(columns, row)) for row in rows] else: entries = [] return {"entries": entries, "count": len(entries)} # ---- Admin contradiction endpoints ---- @router.get("/admin/contradictions") async def admin_contradictions( resolved: Optional[bool] = None, exclude_personal: bool = True, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """List knowledge contradictions for admin review. By default (`exclude_personal=True`), personal items are replaced with {id, hidden: true} so the contradiction record is still visible for governance but personal content is not exposed. Pass exclude_personal=false to opt in to full content (KM_ADMIN only — see ADR Decision 1). """ repo = KnowledgeRepository(conn) contradictions = repo.list_contradictions(resolved=resolved) # Collect all distinct item IDs and fetch in one query (M5 batch optimisation). all_item_ids = list({ id_ for c in contradictions for id_ in (c["item_a_id"], c["item_b_id"]) }) items_by_id = repo.get_by_ids(all_item_ids) for c in contradictions: item_a = items_by_id.get(c["item_a_id"]) item_b = items_by_id.get(c["item_b_id"]) if exclude_personal: c["item_a"] = {"id": c["item_a_id"], "hidden": True} if item_a and item_a.get("is_personal") else item_a c["item_b"] = {"id": c["item_b_id"], "hidden": True} if item_b and item_b.get("is_personal") else item_b else: c["item_a"] = item_a c["item_b"] = item_b return {"contradictions": contradictions, "count": len(contradictions)} @router.post("/admin/contradictions", status_code=201) async def admin_create_contradiction( request: CreateContradictionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Admin endpoint for manually recording a contradiction between two knowledge items.""" repo = KnowledgeRepository(conn) if not repo.get_by_id(request.item_a_id): raise HTTPException(status_code=404, detail=f"Item A not found: {request.item_a_id}") if not repo.get_by_id(request.item_b_id): raise HTTPException(status_code=404, detail=f"Item B not found: {request.item_b_id}") cid = repo.create_contradiction( item_a_id=request.item_a_id, item_b_id=request.item_b_id, explanation=request.explanation, severity=request.severity, suggested_resolution=request.suggested_resolution, ) return {"id": cid} @router.post("/admin/contradictions/{contradiction_id}/resolve") async def admin_resolve_contradiction( contradiction_id: str, request: ResolveContradictionRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Resolve a knowledge contradiction.""" repo = KnowledgeRepository(conn) contradiction = repo.get_contradiction(contradiction_id) if not contradiction: raise HTTPException(status_code=404, detail="Contradiction not found") if contradiction.get("resolved"): raise HTTPException(status_code=400, detail="Contradiction already resolved") valid_resolutions = ["kept_a", "kept_b", "merged", "both_valid"] if request.resolution not in valid_resolutions: raise HTTPException( status_code=400, detail=f"Resolution must be one of: {valid_resolutions}", ) repo.resolve_contradiction(contradiction_id, user["email"], request.resolution) _audit_action(conn, user["email"], "resolve_contradiction", contradiction_id, { "resolution": request.resolution, "item_a_id": contradiction["item_a_id"], "item_b_id": contradiction["item_b_id"], }) return {"id": contradiction_id, "resolved": True, "resolution": request.resolution} # ---- Admin duplicate-candidate endpoints (issue #62) ---- VALID_DUPLICATE_RESOLUTIONS = ["duplicate", "different", "dismissed"] DUPLICATE_RELATION_TYPE = "likely_duplicate" def _strip_personal(item: Optional[dict], hide: bool) -> Optional[dict]: """Return a placeholder dict when ``item`` is personal and ``hide`` is set.""" if item is None: return None if hide and item.get("is_personal"): return {"id": item.get("id"), "hidden": True} return item @router.get("/admin/duplicate-candidates") async def admin_duplicate_candidates( resolved: Optional[bool] = None, exclude_personal: bool = True, limit: int = 100, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """List duplicate-candidate relations for admin review. Pass ``resolved=true`` or ``resolved=false`` to filter; omit both to fetch every state (the original UI default). The web UI keeps surfacing the actionable backlog by passing ``resolved=false`` explicitly. With ``exclude_personal=true`` (default) personal items in the pair are replaced with ``{id, hidden: true}`` — the relation row is still visible so admins can resolve it, but content stays inside the personal-item privacy boundary (ADR Decision 1 precedent). """ repo = KnowledgeRepository(conn) relations = repo.list_relations( relation_type=DUPLICATE_RELATION_TYPE, resolved=resolved, limit=limit, ) item_ids = list({ id_ for r in relations for id_ in (r["item_a_id"], r["item_b_id"]) }) items_by_id = repo.get_by_ids(item_ids) if item_ids else {} for r in relations: r["item_a"] = _strip_personal(items_by_id.get(r["item_a_id"]), exclude_personal) r["item_b"] = _strip_personal(items_by_id.get(r["item_b_id"]), exclude_personal) return {"relations": relations, "count": len(relations)} @router.post("/admin/duplicate-candidates/resolve") async def admin_resolve_duplicate_candidate( item_a_id: str, item_b_id: str, request: ResolveDuplicateRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Resolve a duplicate-candidate relation. Admin chooses: ``duplicate`` (acknowledge), ``different`` (false positive), or ``dismissed`` (don't surface again, but no judgment). Idempotent re-resolve is rejected with 400 — the audit trail wants one decision per pair. """ if request.resolution not in VALID_DUPLICATE_RESOLUTIONS: raise HTTPException( status_code=400, detail=f"resolution must be one of: {VALID_DUPLICATE_RESOLUTIONS}", ) repo = KnowledgeRepository(conn) existing = repo.get_relation(item_a_id, item_b_id, DUPLICATE_RELATION_TYPE) if not existing: raise HTTPException(status_code=404, detail="Duplicate-candidate relation not found") if existing.get("resolved"): raise HTTPException(status_code=400, detail="Relation already resolved") repo.resolve_relation( item_a_id=item_a_id, item_b_id=item_b_id, relation_type=DUPLICATE_RELATION_TYPE, resolved_by=user["email"], resolution=request.resolution, ) # Resource-id of audit row is the canonical (a,b) pair for grep-ability. a, b = sorted([item_a_id, item_b_id]) _audit_action( conn, user["email"], "resolve_duplicate", f"{a}::{b}", {"resolution": request.resolution, "item_a_id": a, "item_b_id": b}, ) return { "item_a_id": a, "item_b_id": b, "resolved": True, "resolution": request.resolution, } # ---- Admin PATCH + bulk-update + tree endpoints (issue #62) ---- @router.get("/admin/{item_id}") async def admin_get_item( item_id: str, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Single-item GET — powers the ``#item-`` deep link from `/memory/d/`'s Edit affordance. The admin page uses this to fetch the row directly (bypassing pagination of the All-Items list) so the edit modal opens reliably regardless of which page the item happens to fall on. Returns the same dict shape as the list rows. Route placement note: declared AFTER all named ``/admin/`` GET routes (pending, audit, contradictions, duplicate-candidates) so the catch-all ``{item_id}`` doesn't shadow them — FastAPI matches in declaration order. """ repo = KnowledgeRepository(conn) item = repo.get_by_id(item_id) if not item: raise HTTPException(status_code=404, detail="item_not_found") return item @router.patch("/admin/{item_id}") async def admin_patch_item( item_id: str, request: PatchItemRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Partial update — accepts category/domain/tags/audience/title/content. Replaces the narrow ``POST /api/memory/admin/edit`` (kept one release as a thin alias). Audit row tagged ``corporate_memory.update_item`` records which fields changed (not the full diff — keep audit rows compact). """ repo = KnowledgeRepository(conn) _get_item_or_404(repo, item_id) # ``exclude_unset=True`` preserves explicit ``null`` values from the request # body so callers can clear previously-set Optional fields (e.g. PATCH # ``{"audience": null}`` resets audience to NULL). With ``exclude_none=True`` # those nulls were silently dropped — the only path to clear was the # empty-string short-circuit on ``domain``, and ``audience`` had no clearing # path at all. See PR #126 round-4 review. updates = request.model_dump(exclude_unset=True) if "domain" in updates and updates["domain"]: _validate_domain_slug(updates["domain"], conn) # ``title`` is NOT NULL in the schema. ``exclude_unset=True`` lets explicit # ``null`` through, which would 500 on a DuckDB constraint violation. Reject # at the boundary so the caller gets a 400 with a clear message instead. if "title" in updates and updates["title"] is None: raise HTTPException(status_code=400, detail="title cannot be null") # M:N domain membership lives in ``knowledge_item_domains`` and is # written via a separate junction repo call — strip from the legacy # ``repo.update(**)`` kwargs since the knowledge_items row has no # ``domain_ids`` column. domain_ids = updates.pop("domain_ids", None) if not updates and domain_ids is None: return {"id": item_id, "updated": []} # tags is a list — JSON-encode to match the column type, mirroring create(). repo_kwargs = dict(updates) if "tags" in repo_kwargs: repo_kwargs["tags"] = ( json.dumps(repo_kwargs["tags"]) if repo_kwargs["tags"] else None ) if repo_kwargs: repo.update(item_id, **repo_kwargs) # Junction write — replace the item's full domain membership atomically. # Resolve ids → slugs because replace_domains_for_item takes slugs; # unknown ids raise 400 (admin's chip-input only picks from # /api/admin/memory-domains so a missing id means a race or an # already-deleted domain). if domain_ids is not None: from src.repositories.memory_domains import MemoryDomainsRepository dom_repo = MemoryDomainsRepository(conn) if domain_ids: placeholders = ",".join(["?"] * len(domain_ids)) rows = conn.execute( f"SELECT id, slug FROM memory_domains WHERE id IN ({placeholders})", domain_ids, ).fetchall() id_to_slug = {r[0]: r[1] for r in rows} missing = [i for i in domain_ids if i not in id_to_slug] if missing: raise HTTPException( status_code=400, detail=f"unknown_domain_ids: {missing}", ) slugs = [id_to_slug[i] for i in domain_ids] else: slugs = [] dom_repo.replace_domains_for_item( item_id, slugs, added_by=user["email"] ) audit_keys = sorted(updates.keys()) if domain_ids is not None: audit_keys.append("domain_ids") _audit_action( conn, user["email"], "update_item", item_id, {"updated_fields": audit_keys}, ) return {"id": item_id, "updated": audit_keys} @router.post("/admin/bulk-update") async def admin_bulk_update( request: BulkUpdateRequest, user: dict = Depends(require_admin), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Apply ``updates`` to every id in ``item_ids``. Per-id audit rows. Returns a per-id status map plus rolled-up convenience lists (200 even on partial failure — the body distinguishes successes from misses). """ repo = KnowledgeRepository(conn) updates = dict(request.updates or {}) # Reject governance-sensitive fields BEFORE hitting the repo. _UPDATABLE_FIELDS # in the repo is broad on purpose; this endpoint is the narrow path. Callers # that need to flip status/sensitivity/is_personal must use the dedicated # governance endpoints so the right audit row is written. See PR #126 review. disallowed = sorted(k for k in updates.keys() if k not in _BULK_UPDATE_ALLOWED) if disallowed: raise HTTPException( status_code=400, detail=( f"updates contains disallowed field(s): {disallowed}. " f"Allowed: {sorted(_BULK_UPDATE_ALLOWED)}" ), ) if "domain" in updates and updates["domain"]: _validate_domain_slug(updates["domain"], conn) # Mirror the PATCH boundary check — title is NOT NULL in the schema, so # an explicit null here would fall through to a per-item Constraint Error # in repo.bulk_update() instead of a clean 400 to the caller. if "title" in updates and updates["title"] is None: raise HTTPException(status_code=400, detail="title cannot be null") if not request.item_ids: return {"updated": [], "not_found": [], "errors": {}} statuses = repo.bulk_update(request.item_ids, updates) # Allowlist already enforced above, so every key in updates is auditable. audited_fields = sorted(updates.keys()) updated: List[str] = [] not_found: List[str] = [] errors: dict = {} for item_id, status in statuses.items(): if status == "updated": updated.append(item_id) _audit_action( conn, user["email"], "bulk_update", item_id, {"updated_fields": audited_fields, "batch": True}, ) elif status == "not_found": not_found.append(item_id) else: errors[item_id] = status return {"updated": updated, "not_found": not_found, "errors": errors} # Axes the tree endpoint groups by. Anything else → 400. Order matters for # the default chip rendering in the UI. _TREE_AXES = ("domain", "category", "tag", "audience") def _label_for_axis(axis: str, key: Optional[str]) -> str: """Pretty bucket label for the tree UI. Falls back to the raw key.""" if key is None or key == "": return { "domain": "(no domain)", "category": "(no category)", "tag": "(no tag)", "audience": "All users", }.get(axis, "(unset)") if axis == "audience" and key == "all": return "All users" if axis == "audience" and key.startswith("group:"): return f"Group: {key[len('group:'):]}" return key def _matches_chip_filters( item: dict, *, status_filter: Optional[str], source_type: Optional[str], audience: Optional[str], has_duplicate_ids: Optional[set], q: Optional[str], ) -> bool: """Apply chip filters to an already-RBAC-filtered item.""" if status_filter and item.get("status") != status_filter: return False if source_type and item.get("source_type") != source_type: return False if audience: # Treat NULL audience as 'all' so chip-filter behavior matches the # SQL audience filter, ``count_by_audience`` (COALESCE→'all'), and the # tree's ``_bucket_key`` (NULL → 'all'). Without this coalesce, # NULL-audience items disappear from the ``audience=all`` chip even # though the rest of the system treats them as visible-to-everyone. item_audience = item.get("audience") or "all" if item_audience != audience: return False if has_duplicate_ids is not None and item.get("id") not in has_duplicate_ids: return False if q: needle = q.lower() title = (item.get("title") or "").lower() content = (item.get("content") or "").lower() if needle not in title and needle not in content: return False return True @router.get("/tree") async def get_tree( axis: str = "domain", status_filter: Optional[str] = None, source_type: Optional[str] = None, audience: Optional[str] = None, q: Optional[str] = None, has_duplicate: bool = False, exclude_personal: bool = True, is_required: Optional[bool] = None, page: int = 1, per_page: int = 50, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Server-side grouping for the Browse / Group-by tree (issue #62). Returns ``{groups: [{key, label, count, items: [...]}]}`` already RBAC-filtered + chip-filtered. The same ``_effective_groups`` and ``_can_view_item`` helpers used by ``GET /api/memory`` apply, so a non-admin caller never sees personal items belonging to others, and audience-restricted items only surface for members of the audience group. On the ``tag`` axis a single item appears once per tag it holds — that is the intended "overlapping bucket" affordance. Every other axis puts each item in its single canonical bucket. """ if axis not in _TREE_AXES: raise HTTPException( status_code=400, detail=f"axis must be one of: {list(_TREE_AXES)}", ) repo = KnowledgeRepository(conn) is_priv = _is_privileged_viewer(user, conn) effective_groups = _effective_groups(user, conn) # Privacy parity with ``GET /api/memory``: non-admin can never opt out. effective_exclude_personal = True if not is_priv else exclude_personal page = max(page, 1) per_page = max(min(per_page, 500), 1) # has_duplicate=true narrows the candidate set to items present in any # unresolved likely_duplicate relation. Computed once; intersected per # item below. has_duplicate_ids: Optional[set] = None if has_duplicate: rels = repo.list_relations( relation_type=DUPLICATE_RELATION_TYPE, resolved=False, limit=10000, ) has_duplicate_ids = { id_ for r in rels for id_ in (r["item_a_id"], r["item_b_id"]) } # Audience-axis privacy (decision 13): non-admins only see their own # group buckets + null/all. Use the audience pre-filter on the SQL side # so non-admins never accidentally see another group's bucket count. granted_domains = _caller_granted_memory_domains(user, conn) statuses = [status_filter] if status_filter else None items = repo.list_items( statuses=statuses, source_type=source_type, is_required=is_required, exclude_personal=effective_exclude_personal, user_groups=effective_groups, granted_domains=granted_domains, limit=10000, offset=0, ) # Apply remaining chip filters that don't have a SQL layer yet. visible: List[dict] = [] for item in items: if not _can_view_item(user, item, is_priv): continue if not _matches_chip_filters( item, status_filter=None, # already in SQL source_type=None, # already in SQL audience=audience, has_duplicate_ids=has_duplicate_ids, q=q, ): continue visible.append(item) # Group items by axis. tag is multi-bucket; everything else is single. groups: dict = {} def _bucket_key(item: dict, axis: str) -> List[str]: if axis == "tag": tags = item.get("tags") if isinstance(tags, str): try: tags = json.loads(tags) except json.JSONDecodeError: tags = [] if not tags: return [""] return [str(t) for t in tags] if axis == "audience": aud = item.get("audience") return [aud if aud else "all"] if axis == "category": return [item.get("category") or ""] # default: domain return [item.get("domain") or ""] for item in visible: for key in _bucket_key(item, axis): bucket = groups.setdefault(key, { "key": key, "label": _label_for_axis(axis, key), "items": [], }) bucket["items"].append(item) # Stable ordering: alphabetic on key with the empty bucket sinking to # the bottom — the UI usually wants the "real" buckets up top. ordered = sorted( groups.values(), key=lambda g: (g["key"] == "", g["key"].lower() if isinstance(g["key"], str) else ""), ) for g in ordered: g["count"] = len(g["items"]) # Page over groups (not items): operators paging through hundreds of # tags want bucket-level pagination. The UI expands a bucket to see all # its items. start = (page - 1) * per_page paged = ordered[start:start + per_page] return { "axis": axis, "groups": paged, "page": page, "per_page": per_page, "total_groups": len(ordered), "total_items": sum(g["count"] for g in ordered), } # ---- Bundle endpoint ---- def _build_per_domain_markdown( slug: str, user: dict, conn: duckdb.DuckDBPyConnection ) -> Response: """Render a deterministic markdown bundle for a single memory domain. Used by ``agnes pull`` to write ``~/.claude/memory//bundle.md``. The bundle includes both ``is_required=TRUE`` and approved items so the per-domain md5 in ``/api/sync/manifest`` (built from the same item set in ``_build_memory_domains_section``) matches the md5 of what the CLI just received. Items are sorted by ``id`` to mirror the manifest's md5 computation byte-for-byte (Section 5.1 of the unified-stack design). RBAC: the caller must have a grant on the domain — admins bypass via ``can_access``'s admin short-circuit. Anonymous or grantless callers get 403. """ repo = MemoryDomainsRepository(conn) dom = repo.get_by_slug(slug) if not dom: raise HTTPException(status_code=404, detail="memory_domain_not_found") if not can_access(user["id"], "memory_domain", dom["id"], conn): raise HTTPException(status_code=403, detail="no_grant") # Pull items the same way the manifest md5 helper does — id order, # full payload (title/status/is_required pulled via the knowledge # repository for content), no token-budget truncation. items_meta = repo.list_items_of_domain(dom["id"], limit=10000) if not items_meta: body = f"# {dom['name']}\n\n_No items in this domain yet._\n" return Response(content=body, media_type="text/markdown; charset=utf-8") # Fetch full bodies — list_items_of_domain only returns id/title/status. knowledge_repo = KnowledgeRepository(conn) full_items: list = [] for meta in sorted(items_meta, key=lambda r: r["id"]): full = knowledge_repo.get_by_id(meta["id"]) if not full: continue full_items.append(full) lines: list = [f"# {dom['name']}", ""] if dom.get("description"): lines.append(dom["description"]) lines.append("") required = [it for it in full_items if it.get("is_required")] approved = [ it for it in full_items if not it.get("is_required") and it.get("status") == "approved" ] if required: lines.append("## Required") lines.append("") for it in required: lines.append(f"### {it.get('title', 'Untitled')}") lines.append("") lines.append(it.get("content", "") or "") lines.append("") if approved: lines.append("## Approved") lines.append("") for it in approved: lines.append(f"### {it.get('title', 'Untitled')}") lines.append("") lines.append(it.get("content", "") or "") lines.append("") body = "\n".join(lines).rstrip() + "\n" return Response(content=body, media_type="text/markdown; charset=utf-8") @router.get("/bundle") async def get_bundle( domain: Optional[str] = None, user: dict = Depends(get_current_user), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Token-budgeted bundle of knowledge items for AI agent injection. Mandatory items are always included regardless of the token budget. Approved items are confidence×recency-ranked and included until the budget is exhausted. Audience-filtered by the caller's group memberships (admins see everything). v49: when ``?domain=`` is supplied the response shape switches to ``text/markdown`` containing a deterministic per-domain bundle — that's what ``agnes pull`` writes to ``~/.claude/memory//bundle.md``. RBAC: the caller must have a ``MEMORY_DOMAIN`` grant on the domain (admins bypass per ``can_access``). The markdown body sorts items alphabetically by title and includes both required and approved items (required first, with a marker) so the bundle md5 in the manifest matches what the CLI re-renders. """ from datetime import datetime, timezone # ----- Per-domain markdown variant (v49) ----- if domain: return _build_per_domain_markdown(domain, user, conn) repo = KnowledgeRepository(conn) effective_groups = _effective_groups(user, conn) granted_domains = _caller_granted_memory_domains(user, conn) # v46: the bundle is what AI agents inject as context, so the opt-out # has real effect here — it's always-on for the calling user. Mandatory # items are exempted by the EXISTS subquery's status guard inside # ``list_items``; the user's dismissal row for a then-approved item is # silently ignored if/when the item is later mandated. dismissed_by_user_id = user["id"] # v49: Required tier rides on is_required boolean. Was statuses=['mandatory']. mandatory = repo.list_items( is_required=True, exclude_personal=True, user_groups=effective_groups, granted_domains=granted_domains, dismissed_by_user=dismissed_by_user_id, hide_dismissed=True, limit=1000, offset=0, ) approved = repo.list_items( statuses=["approved"], is_required=False, exclude_personal=True, user_groups=effective_groups, granted_domains=granted_domains, dismissed_by_user=dismissed_by_user_id, hide_dismissed=True, limit=1000, offset=0, ) # Rank approved by confidence × recency (days since updated_at, max 365). # updated_at is intentional: a recently admin-edited item reflects a human # who just reviewed and corrected it, making it more trustworthy than an # older untouched item. This differs from confidence.py which decays from # created_at — the two scores serve different purposes (credibility vs freshness). now = datetime.now(timezone.utc) def _rank(item: dict) -> float: confidence = float(item["confidence"]) if item.get("confidence") is not None else 0.5 updated_raw = item.get("updated_at") if updated_raw: try: if isinstance(updated_raw, str): from datetime import datetime as dt updated = dt.fromisoformat(updated_raw.replace("Z", "+00:00")) else: updated = updated_raw if updated.tzinfo is None: from datetime import timezone as tz updated = updated.replace(tzinfo=tz.utc) age_days = max((now - updated).days, 0) except Exception: age_days = 365 else: age_days = 365 recency = max(0.0, 1.0 - age_days / 365.0) return confidence * recency approved_ranked = sorted(approved, key=_rank, reverse=True) def _token_est(item: dict) -> int: return len((item.get("title", "") + " " + item.get("content", ""))) // _CHARS_PER_TOKEN budget_remaining = BUNDLE_TOKEN_BUDGET - sum(_token_est(i) for i in mandatory) approved_included = [] for item in approved_ranked: cost = _token_est(item) if budget_remaining - cost < 0: break approved_included.append(item) budget_remaining -= cost return { "mandatory": mandatory, "approved": approved_included, "token_estimate": BUNDLE_TOKEN_BUDGET - budget_remaining, "token_budget": BUNDLE_TOKEN_BUDGET, }