* feat(api): enforce API design rules via pytest + fix DELETE/status-code violations
Adds tests/test_api_design_rules.py with four forward-only design guardrails
that prevent new endpoints from accumulating REST debt:
Rule 1 — No new verbs in URL paths (existing 28 grandfathered via allowlist)
Rule 2 — DELETE must declare 204 No Content (zero allowlist entries)
Rule 3 — Creator POSTs (path has GET counterpart) must declare 201/202
Rule 4 — All protected /api/* routes must declare 401 and 403
Fixes found by running the rules:
- DELETE /api/admin/metrics/{metric_id}: return 204, drop redundant body
- DELETE /api/memory/{item_id}/dismiss (undismiss): return 204, drop body
- POST /api/memory/admin/contradictions: add status_code=201 (creates a resource)
- app/main.py: _add_auth_error_responses() injected into app.openapi() at startup;
declares 401/403 on all protected /api/* operations centrally, fixing the 120
routes that previously omitted these response codes from the spec.
Closes #337
* fix(api): resolve CI failures — extend 204 fixes + complete allowlists
- Fix remaining 6 DELETE endpoints to return 204: store entities,
store entity install, marketplace curated install, marketplace plugin
system flag, admin store submission, and observability view
- Update all affected tests to expect 204 (removed body assertions)
- Add 4 missing verb paths to _VERB_PATH_ALLOWLIST in test_api_design_rules.py
- Add 2 upsert endpoints to _CREATOR_POST_ALLOWLIST
- Update admin_marketplaces.html to not call r.json() on 204 DELETE
* fix(tests): align 2 DELETE-asserting tests with 204 contract (post-#339 rebase)
CI's test-shard (1) and (4) failures on this PR were caused by
Vojta's second commit (`fix(api): resolve CI failures — extend 204
fixes`) flipping more DELETE endpoints to status_code=204 than just
the two mentioned in the PR body. Two tests assert status_code==200
on the DELETE response and broke:
- tests/test_admin_store_submissions.py::TestQuarantineGates::test_admin_can_delete_quarantined
(DELETE /api/store/entities/{entity_id})
- tests/test_store_api.py::TestInstallCycle::test_admin_hard_delete_cascades_installs
(DELETE /api/store/entities/{entity_id}?hard=true)
Updated both to assert 204 with a comment pointing at
tests/test_api_design_rules.py rule 2 so future reviewers can
trace the contract. Verified via broader scan that no other test
asserts == 200 on a .delete() response directly (4 other sites do
.delete() then check 200 on a subsequent GET — those are fine).
* release: 0.54.26 — API design rules (test_api_design_rules.py) + 8 DELETE endpoints flip to 204
---------
Co-authored-by: ZdenekSrotyr <zdenek.srotyr@keboola.com>
1428 lines
52 KiB
Python
1428 lines
52 KiB
Python
"""Corporate memory endpoints — knowledge items, voting, governance admin, contradictions."""
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import uuid
|
||
from typing import Optional, List
|
||
|
||
from fastapi import APIRouter, Depends, HTTPException
|
||
from pydantic import BaseModel
|
||
import duckdb
|
||
|
||
from app.auth.dependencies import get_current_user, _get_db
|
||
from app.auth.access import require_admin, is_user_admin
|
||
from src.repositories.knowledge import KnowledgeRepository
|
||
from src.repositories.audit import AuditRepository
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(prefix="/api/memory", tags=["memory"])
|
||
|
||
VALID_STATUSES = ["pending", "approved", "mandatory", "rejected", "revoked", "expired"]
|
||
|
||
BUNDLE_TOKEN_BUDGET = 6000
|
||
# Rough chars-per-token estimate (conservative).
|
||
_CHARS_PER_TOKEN = 4
|
||
VALID_DOMAINS = ["finance", "engineering", "product", "data", "operations", "infrastructure"]
|
||
|
||
# API-layer allowlist for ``POST /api/memory/admin/bulk-update``. The repo's
|
||
# ``_UPDATABLE_FIELDS`` is intentionally broader (``status``, ``sensitivity``,
|
||
# ``is_personal``, ``confidence``, valid_from/until, supersedes, etc.) so the
|
||
# narrow per-item ``update`` path can still touch them; bulk-edit must NOT,
|
||
# because changing status / personal-flag / sensitivity in bulk bypasses the
|
||
# proper governance flow (``/admin/mandate``, ``/admin/revoke``,
|
||
# ``/{id}/personal``) and its dedicated audit rows. Callers that need those
|
||
# fields in bulk should use the per-item endpoints. See PR #126 review.
|
||
_BULK_UPDATE_ALLOWED = frozenset({
|
||
"category", "domain", "tags", "tags_add", "tags_remove",
|
||
"audience", "title", "content",
|
||
})
|
||
|
||
|
||
def _is_privileged_viewer(user: dict, conn: duckdb.DuckDBPyConnection) -> bool:
|
||
"""Admins (members of the Admin system group, per RBAC v13) are the
|
||
privileged viewer tier. Pre-v13 the schema also had a km_admin role; v13
|
||
collapsed the role hierarchy into groups, so the corporate-memory admin
|
||
capability now lives on top of plain admin membership. Module authors
|
||
needing a finer-grained gate (curator-only, etc.) should add a
|
||
``ResourceType.CORPORATE_MEMORY_ADMIN`` resource type and gate with
|
||
``require_resource_access`` instead of extending this helper."""
|
||
user_id = user.get("id")
|
||
if not user_id:
|
||
return False
|
||
return is_user_admin(user_id, conn)
|
||
|
||
|
||
def _effective_groups(
|
||
user: dict, conn: duckdb.DuckDBPyConnection
|
||
) -> Optional[List[str]]:
|
||
"""Audience-filter group list for the caller, or ``None`` for admins
|
||
(no filter — see all items regardless of audience).
|
||
|
||
Reads from ``user_group_members`` JOIN ``user_groups`` (the v13 model).
|
||
Pre-v13 this read ``users.groups`` JSON; that column was dropped in v13
|
||
and the membership is now materialized in ``user_group_members`` with a
|
||
``source`` discriminator (admin / google_sync / system_seed).
|
||
"""
|
||
if _is_privileged_viewer(user, conn):
|
||
return None
|
||
user_id = user.get("id")
|
||
if not user_id:
|
||
return []
|
||
rows = conn.execute(
|
||
"""SELECT g.name FROM user_group_members m
|
||
JOIN user_groups g ON m.group_id = g.id
|
||
WHERE m.user_id = ?""",
|
||
[user_id],
|
||
).fetchall()
|
||
return [f"group:{r[0]}" for r in rows]
|
||
|
||
|
||
def _caller_granted_memory_domains(
|
||
user: dict,
|
||
conn: duckdb.DuckDBPyConnection,
|
||
) -> Optional[List[str]]:
|
||
"""Domains the caller has been granted access to via resource_grants.
|
||
|
||
The grant model is generic — admins assign ``MEMORY_DOMAIN`` resources
|
||
(e.g. ``finance``) to ``user_groups`` rows via ``/admin/access``. This
|
||
helper resolves the caller's group memberships against
|
||
``resource_grants`` and returns the union of domain strings.
|
||
|
||
Returns ``None`` for privileged viewers (admins see everything regardless
|
||
of grants — same convention as ``_effective_groups``). Returns an
|
||
empty list when the caller has no grants — the SQL filter then treats
|
||
this as a no-op (the ``OR domain IN ()`` clause is skipped).
|
||
"""
|
||
if _is_privileged_viewer(user, conn):
|
||
return None
|
||
user_id = user.get("id")
|
||
if not user_id:
|
||
return []
|
||
rows = conn.execute(
|
||
"""SELECT DISTINCT rg.resource_id
|
||
FROM resource_grants rg
|
||
JOIN user_group_members m ON m.group_id = rg.group_id
|
||
WHERE m.user_id = ?
|
||
AND rg.resource_type = 'memory_domain'""",
|
||
[user_id],
|
||
).fetchall()
|
||
return [r[0] for r in rows]
|
||
|
||
|
||
def _can_view_item(user: dict, item: dict, is_priv: bool) -> bool:
|
||
"""Personal items are visible only to the contributor and privileged
|
||
viewers. Non-personal items are visible to any authenticated user.
|
||
|
||
``is_priv`` is pre-computed by the caller (one DB hit per request) so
|
||
a per-item loop doesn't re-query ``user_group_members`` for every row.
|
||
"""
|
||
if not item.get("is_personal"):
|
||
return True
|
||
if is_priv:
|
||
return True
|
||
return item.get("source_user") == user.get("email")
|
||
|
||
|
||
class CreateKnowledgeRequest(BaseModel):
|
||
title: str
|
||
content: str
|
||
category: str
|
||
tags: Optional[List[str]] = None
|
||
domain: Optional[str] = None
|
||
entities: Optional[List[str]] = None
|
||
source_type: Optional[str] = None
|
||
|
||
|
||
class VoteRequest(BaseModel):
|
||
vote: int
|
||
|
||
|
||
class PersonalFlagRequest(BaseModel):
|
||
is_personal: bool
|
||
|
||
|
||
class AdminActionRequest(BaseModel):
|
||
reason: Optional[str] = None
|
||
audience: Optional[str] = None
|
||
|
||
|
||
class EditRequest(BaseModel):
|
||
title: Optional[str] = None
|
||
content: Optional[str] = None
|
||
|
||
|
||
class BatchActionRequest(BaseModel):
|
||
item_ids: List[str]
|
||
action: str # approve, reject, mandate, revoke
|
||
reason: Optional[str] = None
|
||
audience: Optional[str] = None
|
||
|
||
|
||
class ResolveContradictionRequest(BaseModel):
|
||
resolution: str # kept_a, kept_b, merged, both_valid
|
||
|
||
|
||
class CreateContradictionRequest(BaseModel):
|
||
item_a_id: str
|
||
item_b_id: str
|
||
explanation: str
|
||
severity: Optional[str] = None
|
||
suggested_resolution: Optional[str] = None
|
||
|
||
|
||
class PatchItemRequest(BaseModel):
|
||
"""Partial update for a knowledge item via PATCH /api/memory/admin/{id}.
|
||
|
||
Replaces the narrow ``EditRequest`` (title + content only). Any field
|
||
left as ``None`` is unchanged. Domain is validated against
|
||
``VALID_DOMAINS`` when supplied.
|
||
"""
|
||
title: Optional[str] = None
|
||
content: Optional[str] = None
|
||
category: Optional[str] = None
|
||
domain: Optional[str] = None
|
||
tags: Optional[List[str]] = None
|
||
audience: Optional[str] = None
|
||
|
||
|
||
class BulkUpdateRequest(BaseModel):
|
||
"""Apply ``updates`` to every id in ``item_ids``. Issue #62."""
|
||
item_ids: List[str]
|
||
updates: dict
|
||
|
||
|
||
class ResolveDuplicateRequest(BaseModel):
|
||
"""Resolve a duplicate-candidate relation row.
|
||
|
||
``resolution`` is one of ``duplicate`` / ``different`` / ``dismissed``
|
||
(decision 2 in issue #62 — no auto-merge action; merging is a separate
|
||
larger feature).
|
||
"""
|
||
resolution: str
|
||
|
||
|
||
# ---- User endpoints ----
|
||
|
||
@router.get("")
|
||
async def list_knowledge(
|
||
status_filter: Optional[str] = None,
|
||
category: Optional[str] = None,
|
||
domain: Optional[str] = None,
|
||
source_type: Optional[str] = None,
|
||
search: Optional[str] = None,
|
||
exclude_personal: bool = True,
|
||
upvoted_by_me: bool = False,
|
||
hide_dismissed: bool = False,
|
||
page: int = 1,
|
||
per_page: int = 50,
|
||
sort: str = "updated_at",
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""List knowledge items with filtering, pagination, search.
|
||
|
||
``upvoted_by_me=true`` narrows to items the caller upvoted (powers the
|
||
"My Upvotes" filter on /corporate-memory — replaces the old dead
|
||
"My Rules" category sentinel).
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
page = max(page, 1)
|
||
offset = (page - 1) * per_page
|
||
# Privacy: non-privileged viewers can never opt out of the personal filter.
|
||
# Their own personal contributions are visible via /my-contributions, not here.
|
||
effective_exclude_personal = True if not _is_privileged_viewer(user, conn) else exclude_personal
|
||
effective_groups = _effective_groups(user, conn)
|
||
granted_domains = _caller_granted_memory_domains(user, conn)
|
||
statuses = [status_filter] if status_filter else None
|
||
upvoted_by_user_id = user["id"] if upvoted_by_me else None
|
||
# v46: caller's id is plumbed to repo filters when hide_dismissed=True so
|
||
# the SQL can NOT-EXISTS-subquery against knowledge_item_user_dismissed.
|
||
# Mandatory items are exempted by the subquery's status guard.
|
||
dismissed_by_user_id = user["id"]
|
||
if search:
|
||
items = repo.search(
|
||
search,
|
||
exclude_personal=effective_exclude_personal,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
statuses=statuses,
|
||
category=category,
|
||
domain=domain,
|
||
source_type=source_type,
|
||
dismissed_by_user=dismissed_by_user_id,
|
||
hide_dismissed=hide_dismissed,
|
||
limit=per_page,
|
||
offset=offset,
|
||
)
|
||
if upvoted_by_user_id:
|
||
# Best-effort post-filter for the search() path (which doesn't
|
||
# plumb the upvote filter into its SQL). Search + "My Upvotes"
|
||
# is rare enough that a post-filter is fine.
|
||
upvoted_ids = {
|
||
r[0] for r in conn.execute(
|
||
"SELECT item_id FROM knowledge_votes WHERE user_id = ? AND vote > 0",
|
||
[upvoted_by_user_id],
|
||
).fetchall()
|
||
}
|
||
items = [it for it in items if it["id"] in upvoted_ids]
|
||
else:
|
||
items = repo.list_items(
|
||
statuses=statuses,
|
||
category=category,
|
||
domain=domain,
|
||
source_type=source_type,
|
||
exclude_personal=effective_exclude_personal,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
upvoted_by_user=upvoted_by_user_id,
|
||
dismissed_by_user=dismissed_by_user_id,
|
||
hide_dismissed=hide_dismissed,
|
||
limit=per_page,
|
||
offset=offset,
|
||
)
|
||
|
||
# Enrich with votes + per-user dismissal flag. The set lookup keeps the
|
||
# per-item annotation O(1); the frontend uses ``dismissed_by_me`` to
|
||
# render the gray-out state without a separate roundtrip.
|
||
dismissed_set = set(repo.list_dismissed_ids(user["id"]))
|
||
for item in items:
|
||
votes = repo.get_votes(item["id"])
|
||
item["upvotes"] = votes["upvotes"]
|
||
item["downvotes"] = votes["downvotes"]
|
||
item["score"] = votes["upvotes"] - votes["downvotes"]
|
||
item["dismissed_by_me"] = item["id"] in dismissed_set
|
||
|
||
import math
|
||
total_count = repo.count_items(
|
||
search=search,
|
||
statuses=statuses,
|
||
category=category,
|
||
domain=domain,
|
||
source_type=source_type,
|
||
exclude_personal=effective_exclude_personal,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
dismissed_by_user=dismissed_by_user_id,
|
||
hide_dismissed=hide_dismissed,
|
||
)
|
||
total_pages = math.ceil(total_count / per_page) if per_page > 0 else 1
|
||
|
||
return {
|
||
"items": items,
|
||
"count": len(items),
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_count": total_count,
|
||
"total_pages": total_pages,
|
||
}
|
||
|
||
|
||
@router.get("/stats")
|
||
async def get_stats(
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get corporate memory statistics.
|
||
|
||
Aggregations exclude personal items for non-privileged callers — otherwise
|
||
`total` and the `by_*` counts would change in observable ways when a
|
||
colleague flags or unflags a personal item, leaking existence info per
|
||
ADR Decision 1.
|
||
|
||
Uses SQL aggregation rather than ``repo.list_items()`` to keep the
|
||
endpoint cheap on large knowledge bases (the loader path materializes
|
||
every row + parses JSON tags/contributors per row, which blocks the
|
||
event loop on N>1k items). Audience filter mirrors what list_items
|
||
applies: ``audience IS NULL OR audience = 'all'`` plus, for non-admins,
|
||
membership in any of the caller's group-prefixed audiences.
|
||
"""
|
||
is_priv = _is_privileged_viewer(user, conn)
|
||
groups = _effective_groups(user, conn)
|
||
granted_domains = _caller_granted_memory_domains(user, conn)
|
||
|
||
where_clauses: List[str] = []
|
||
params: list = []
|
||
if not is_priv:
|
||
# Personal-item privacy: non-privileged callers see no personal items
|
||
# in the aggregate, even their own. /my-contributions is the canonical
|
||
# surface for a user's personal contributions; including them here
|
||
# would make /api/memory/stats.total disagree with the count visible
|
||
# via GET /api/memory (which forces exclude_personal=True for non-
|
||
# admins regardless of source_user).
|
||
where_clauses.append("(is_personal IS NULL OR is_personal = FALSE)")
|
||
|
||
if groups is not None:
|
||
# Mirror the visibility composition KnowledgeRepository.list_items
|
||
# uses: audience match OR MEMORY_DOMAIN grant. Without this the
|
||
# stats `total` diverges from the list endpoint's `total_count` for
|
||
# non-admin users with grants (Devin BUG_0001 on PR #141 5f649a4).
|
||
visibility = ["audience IS NULL", "audience = 'all'"]
|
||
if groups:
|
||
placeholders = ",".join(["?"] * len(groups))
|
||
visibility.append(f"audience IN ({placeholders})")
|
||
params.extend(groups)
|
||
if granted_domains:
|
||
domain_placeholders = ",".join(["?"] * len(granted_domains))
|
||
visibility.append(f"domain IN ({domain_placeholders})")
|
||
params.extend(granted_domains)
|
||
where_clauses.append("(" + " OR ".join(visibility) + ")")
|
||
|
||
where_sql = (" WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
|
||
|
||
total = conn.execute(
|
||
f"SELECT COUNT(*) FROM knowledge_items{where_sql}", params
|
||
).fetchone()[0] or 0
|
||
|
||
by_status_rows = conn.execute(
|
||
f"SELECT COALESCE(status, 'unknown') AS s, COUNT(*) "
|
||
f"FROM knowledge_items{where_sql} GROUP BY s",
|
||
params,
|
||
).fetchall()
|
||
by_status = {r[0]: r[1] for r in by_status_rows}
|
||
|
||
cat_rows = conn.execute(
|
||
f"SELECT DISTINCT category FROM knowledge_items{where_sql} "
|
||
f"{'AND' if where_sql else 'WHERE'} category IS NOT NULL",
|
||
params,
|
||
).fetchall()
|
||
categories = sorted(r[0] for r in cat_rows if r[0])
|
||
|
||
by_domain_rows = conn.execute(
|
||
f"SELECT COALESCE(domain, 'unset') AS d, COUNT(*) "
|
||
f"FROM knowledge_items{where_sql} GROUP BY d",
|
||
params,
|
||
).fetchall()
|
||
by_domain = {r[0]: r[1] for r in by_domain_rows}
|
||
|
||
by_source_rows = conn.execute(
|
||
f"SELECT COALESCE(source_type, 'unknown') AS st, COUNT(*) "
|
||
f"FROM knowledge_items{where_sql} GROUP BY st",
|
||
params,
|
||
).fetchall()
|
||
by_source_type = {r[0]: r[1] for r in by_source_rows}
|
||
|
||
# by_tag + by_audience extend stats for the chip-filter UI (issue #62).
|
||
# The repo helpers honor the same audience + personal-item filters this
|
||
# endpoint applies above.
|
||
repo = KnowledgeRepository(conn)
|
||
exclude_personal_for_caller = not is_priv
|
||
by_tag = repo.count_by_tag(
|
||
exclude_personal=exclude_personal_for_caller,
|
||
user_groups=groups,
|
||
granted_domains=granted_domains,
|
||
)
|
||
by_audience = repo.count_by_audience(
|
||
exclude_personal=exclude_personal_for_caller,
|
||
user_groups=groups,
|
||
granted_domains=granted_domains,
|
||
)
|
||
|
||
return {
|
||
"total": total,
|
||
"by_status": by_status,
|
||
"categories": categories,
|
||
"by_domain": by_domain,
|
||
"by_source_type": by_source_type,
|
||
"by_tag": by_tag,
|
||
"by_audience": by_audience,
|
||
}
|
||
|
||
|
||
@router.post("", status_code=201)
|
||
async def create_knowledge(
|
||
request: CreateKnowledgeRequest,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
# Mirror the validation already enforced by PATCH /admin/{id} and bulk-update
|
||
# so an item can't be created with a domain it can't be patched to. Empty /
|
||
# missing domain is fine — only reject non-empty values outside the allowlist.
|
||
# See PR #126 review.
|
||
if request.domain and request.domain not in VALID_DOMAINS:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"domain must be one of: {VALID_DOMAINS}",
|
||
)
|
||
repo = KnowledgeRepository(conn)
|
||
item_id = str(uuid.uuid4())
|
||
|
||
# Best-effort auto-tagging — runs only when an LLM extractor is configured.
|
||
tags = list(request.tags) if request.tags else []
|
||
try:
|
||
from config.loader import load_instance_config
|
||
from connectors.llm import create_extractor
|
||
from services.corporate_memory.tagger import auto_tag_items
|
||
cfg = load_instance_config()
|
||
ai_cfg = cfg.get("ai")
|
||
if ai_cfg:
|
||
extractor = create_extractor(ai_cfg)
|
||
stub = [{"id": item_id, "title": request.title, "content": request.content}]
|
||
assignments = await asyncio.to_thread(auto_tag_items, stub, extractor)
|
||
topics = assignments.get(item_id, [])
|
||
if topics:
|
||
seen: set[str] = set()
|
||
merged: list[str] = []
|
||
for t in topics + tags:
|
||
if t not in seen:
|
||
seen.add(t)
|
||
merged.append(t)
|
||
tags = merged
|
||
except Exception:
|
||
pass # tagging is non-critical — never block item creation
|
||
|
||
create_kwargs = dict(
|
||
id=item_id,
|
||
title=request.title,
|
||
content=request.content,
|
||
category=request.category,
|
||
source_user=user.get("email"),
|
||
tags=tags or None,
|
||
domain=request.domain,
|
||
entities=request.entities,
|
||
confidence=0.50,
|
||
)
|
||
if request.source_type:
|
||
create_kwargs["source_type"] = request.source_type
|
||
repo.create(**create_kwargs)
|
||
return {"id": item_id, "status": "pending"}
|
||
|
||
|
||
@router.post("/{item_id}/vote")
|
||
async def vote_knowledge(
|
||
item_id: str,
|
||
request: VoteRequest,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
if request.vote not in (1, -1, 0):
|
||
raise HTTPException(status_code=400, detail="Vote must be 1, -1, or 0 (retract)")
|
||
repo = KnowledgeRepository(conn)
|
||
item = repo.get_by_id(item_id)
|
||
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
if request.vote == 0:
|
||
repo.unvote(item_id, user["id"])
|
||
else:
|
||
repo.vote(item_id, user["id"], request.vote)
|
||
return repo.get_votes(item_id)
|
||
|
||
|
||
@router.get("/my-votes")
|
||
async def get_my_votes(
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get current user's votes on all items."""
|
||
results = conn.execute(
|
||
"SELECT item_id, vote FROM knowledge_votes WHERE user_id = ?", [user["id"]]
|
||
).fetchall()
|
||
return {row[0]: row[1] for row in results}
|
||
|
||
|
||
@router.get("/my-contributions")
|
||
async def get_my_contributions(
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get knowledge items contributed by the current user."""
|
||
repo = KnowledgeRepository(conn)
|
||
email = user.get("email", "")
|
||
items = repo.get_user_contributions(email)
|
||
for item in items:
|
||
votes = repo.get_votes(item["id"])
|
||
item["upvotes"] = votes["upvotes"]
|
||
item["downvotes"] = votes["downvotes"]
|
||
item["score"] = votes["upvotes"] - votes["downvotes"]
|
||
return {"items": items, "count": len(items)}
|
||
|
||
|
||
@router.post("/{item_id}/personal")
|
||
async def toggle_personal_flag(
|
||
item_id: str,
|
||
request: PersonalFlagRequest,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Toggle personal/excluded flag on a knowledge item (only by the contributor)."""
|
||
repo = KnowledgeRepository(conn)
|
||
item = repo.get_by_id(item_id)
|
||
if not item:
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
if item.get("source_user") != user.get("email"):
|
||
raise HTTPException(status_code=403, detail="Only the contributor can flag personal items")
|
||
repo.set_personal(item_id, request.is_personal)
|
||
return {"id": item_id, "is_personal": request.is_personal}
|
||
|
||
|
||
@router.post("/{item_id}/dismiss")
|
||
async def dismiss_item(
|
||
item_id: str,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Per-user opt-out — remove an item from the caller's AI bundle.
|
||
|
||
Idempotent: re-dismissing an already-dismissed item is a no-op success.
|
||
Mandatory items can never be dismissed — the governance hard rule —
|
||
so a POST against one returns 400 with a clear detail message.
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
item = repo.get_by_id(item_id)
|
||
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
if item.get("status") == "mandatory":
|
||
raise HTTPException(status_code=400, detail="Cannot dismiss a mandatory item")
|
||
repo.dismiss(user["id"], item_id)
|
||
return {"id": item_id, "dismissed": True}
|
||
|
||
|
||
@router.delete("/{item_id}/dismiss", status_code=204)
|
||
async def undismiss_item(
|
||
item_id: str,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Idempotent un-dismiss — a second DELETE still returns 204.
|
||
|
||
Returns 404 if the item itself doesn't exist (consistent with the rest
|
||
of the per-item endpoints); the dismissal row's existence is not
|
||
consulted because absence is the success state.
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
item = repo.get_by_id(item_id)
|
||
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
repo.undismiss(user["id"], item_id)
|
||
|
||
|
||
@router.get("/{item_id}/provenance")
|
||
async def get_provenance(
|
||
item_id: str,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get source provenance for a knowledge item."""
|
||
repo = KnowledgeRepository(conn)
|
||
item = repo.get_by_id(item_id)
|
||
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
return {
|
||
"id": item_id,
|
||
"source_type": item.get("source_type"),
|
||
"source_ref": item.get("source_ref"),
|
||
"source_user": item.get("source_user"),
|
||
"confidence": item.get("confidence"),
|
||
"domain": item.get("domain"),
|
||
"entities": item.get("entities"),
|
||
"valid_from": item.get("valid_from"),
|
||
"valid_until": item.get("valid_until"),
|
||
"supersedes": item.get("supersedes"),
|
||
"created_at": item.get("created_at"),
|
||
}
|
||
|
||
|
||
# ---- Admin governance endpoints ----
|
||
|
||
def _get_item_or_404(repo: KnowledgeRepository, item_id: str) -> dict:
|
||
item = repo.get_by_id(item_id)
|
||
if not item:
|
||
raise HTTPException(status_code=404, detail="Knowledge item not found")
|
||
return item
|
||
|
||
|
||
def _audit_action(conn, admin_email: str, action: str, item_id: str, details: dict = None):
|
||
"""Write an admin governance audit row.
|
||
|
||
Action names use the ``corporate_memory.<action>`` namespace as advertised
|
||
in the 0.15.0 CHANGELOG. Pre-#62 the code wrote ``km_<action>`` — the
|
||
audit-tab filter (see ``admin_audit`` below) accepts both prefixes so
|
||
historical rows still surface.
|
||
"""
|
||
audit = AuditRepository(conn)
|
||
audit.log(
|
||
user_id=admin_email,
|
||
action=f"corporate_memory.{action}",
|
||
resource=item_id,
|
||
params=details,
|
||
)
|
||
|
||
|
||
@router.post("/admin/approve")
|
||
async def admin_approve(
|
||
item_id: str,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
repo.update_status(item_id, "approved")
|
||
_audit_action(conn, user["email"], "approve", item_id)
|
||
return {"id": item_id, "status": "approved"}
|
||
|
||
|
||
@router.post("/admin/reject")
|
||
async def admin_reject(
|
||
item_id: str,
|
||
request: AdminActionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
repo.update_status(item_id, "rejected")
|
||
_audit_action(conn, user["email"], "reject", item_id, {"reason": request.reason})
|
||
return {"id": item_id, "status": "rejected"}
|
||
|
||
|
||
@router.post("/admin/mandate")
|
||
async def admin_mandate(
|
||
item_id: str,
|
||
request: AdminActionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
repo.update_status(item_id, "mandatory")
|
||
if request.audience is not None:
|
||
repo.update(item_id, audience=request.audience)
|
||
_audit_action(conn, user["email"], "mandate", item_id, {
|
||
"reason": request.reason, "audience": request.audience,
|
||
})
|
||
return {"id": item_id, "status": "mandatory"}
|
||
|
||
|
||
@router.post("/admin/revoke")
|
||
async def admin_revoke(
|
||
item_id: str,
|
||
request: AdminActionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
repo.update_status(item_id, "revoked")
|
||
_audit_action(conn, user["email"], "revoke", item_id, {"reason": request.reason})
|
||
return {"id": item_id, "status": "revoked"}
|
||
|
||
|
||
@router.post("/admin/edit")
|
||
async def admin_edit(
|
||
item_id: str,
|
||
request: EditRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
updates = {}
|
||
if request.title is not None:
|
||
updates["title"] = request.title
|
||
if request.content is not None:
|
||
updates["content"] = request.content
|
||
if updates:
|
||
repo.update(item_id, **updates)
|
||
_audit_action(conn, user["email"], "edit", item_id, updates)
|
||
return {"id": item_id, "updated": list(updates.keys())}
|
||
|
||
|
||
@router.post("/admin/batch")
|
||
async def admin_batch(
|
||
request: BatchActionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Batch governance action on multiple items."""
|
||
repo = KnowledgeRepository(conn)
|
||
action_map = {
|
||
"approve": "approved",
|
||
"reject": "rejected",
|
||
"mandate": "mandatory",
|
||
"revoke": "revoked",
|
||
}
|
||
if request.action not in action_map:
|
||
raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}")
|
||
|
||
new_status = action_map[request.action]
|
||
results = {"success": [], "not_found": []}
|
||
for item_id in request.item_ids:
|
||
item = repo.get_by_id(item_id)
|
||
if not item:
|
||
results["not_found"].append(item_id)
|
||
continue
|
||
repo.update_status(item_id, new_status)
|
||
if request.action == "mandate" and request.audience is not None:
|
||
repo.update(item_id, audience=request.audience)
|
||
_audit_action(conn, user["email"], request.action, item_id, {
|
||
"reason": request.reason, "audience": request.audience, "batch": True,
|
||
})
|
||
results["success"].append(item_id)
|
||
|
||
return results
|
||
|
||
|
||
@router.get("/admin/pending")
|
||
async def admin_pending(
|
||
category: Optional[str] = None,
|
||
page: int = 1,
|
||
per_page: int = 50,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get pending items queue for admin review."""
|
||
repo = KnowledgeRepository(conn)
|
||
page = max(page, 1)
|
||
offset = (page - 1) * per_page
|
||
items = repo.list_items(statuses=["pending"], category=category, limit=per_page, offset=offset)
|
||
return {"items": items, "count": len(items)}
|
||
|
||
|
||
@router.get("/admin/audit")
|
||
async def admin_audit(
|
||
page: int = 1,
|
||
per_page: int = 50,
|
||
action: Optional[str] = None,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Get governance audit log.
|
||
|
||
Filters ``corporate_memory.<action>`` rows AND legacy ``km_<action>``
|
||
rows. The dual prefix is here because rows already in the audit log keep
|
||
the legacy ``km_*`` action name (no migration of historical audit rows —
|
||
they are write-once); new rows use the ``corporate_memory.*`` namespace.
|
||
See issue #62 decision E.
|
||
"""
|
||
# Pagination: page is 1-indexed; offset must apply to BOTH branches so the
|
||
# UI's per-page navigation actually returns subsequent rows. Pre-fix, both
|
||
# SQL paths had LIMIT only and silently returned page 1 for every page.
|
||
offset = (max(page, 1) - 1) * per_page
|
||
if action:
|
||
# Match the action across both prefixes so the per-action filter still
|
||
# surfaces historical rows.
|
||
rows = conn.execute(
|
||
"""SELECT * FROM audit_log
|
||
WHERE action IN (?, ?)
|
||
ORDER BY timestamp DESC LIMIT ? OFFSET ?""",
|
||
[f"corporate_memory.{action}", f"km_{action}", per_page, offset],
|
||
).fetchall()
|
||
else:
|
||
rows = conn.execute(
|
||
"""SELECT * FROM audit_log
|
||
WHERE action LIKE 'corporate_memory.%' OR action LIKE 'km_%'
|
||
ORDER BY timestamp DESC LIMIT ? OFFSET ?""",
|
||
[per_page, offset],
|
||
).fetchall()
|
||
if rows:
|
||
columns = [desc[0] for desc in conn.description]
|
||
entries = [dict(zip(columns, row)) for row in rows]
|
||
else:
|
||
entries = []
|
||
return {"entries": entries, "count": len(entries)}
|
||
|
||
|
||
# ---- Admin contradiction endpoints ----
|
||
|
||
@router.get("/admin/contradictions")
|
||
async def admin_contradictions(
|
||
resolved: Optional[bool] = None,
|
||
exclude_personal: bool = True,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""List knowledge contradictions for admin review.
|
||
|
||
By default (`exclude_personal=True`), personal items are replaced with
|
||
{id, hidden: true} so the contradiction record is still visible for
|
||
governance but personal content is not exposed. Pass exclude_personal=false
|
||
to opt in to full content (KM_ADMIN only — see ADR Decision 1).
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
contradictions = repo.list_contradictions(resolved=resolved)
|
||
# Collect all distinct item IDs and fetch in one query (M5 batch optimisation).
|
||
all_item_ids = list({
|
||
id_
|
||
for c in contradictions
|
||
for id_ in (c["item_a_id"], c["item_b_id"])
|
||
})
|
||
items_by_id = repo.get_by_ids(all_item_ids)
|
||
for c in contradictions:
|
||
item_a = items_by_id.get(c["item_a_id"])
|
||
item_b = items_by_id.get(c["item_b_id"])
|
||
if exclude_personal:
|
||
c["item_a"] = {"id": c["item_a_id"], "hidden": True} if item_a and item_a.get("is_personal") else item_a
|
||
c["item_b"] = {"id": c["item_b_id"], "hidden": True} if item_b and item_b.get("is_personal") else item_b
|
||
else:
|
||
c["item_a"] = item_a
|
||
c["item_b"] = item_b
|
||
return {"contradictions": contradictions, "count": len(contradictions)}
|
||
|
||
|
||
@router.post("/admin/contradictions", status_code=201)
|
||
async def admin_create_contradiction(
|
||
request: CreateContradictionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Admin endpoint for manually recording a contradiction between two knowledge items."""
|
||
repo = KnowledgeRepository(conn)
|
||
if not repo.get_by_id(request.item_a_id):
|
||
raise HTTPException(status_code=404, detail=f"Item A not found: {request.item_a_id}")
|
||
if not repo.get_by_id(request.item_b_id):
|
||
raise HTTPException(status_code=404, detail=f"Item B not found: {request.item_b_id}")
|
||
|
||
cid = repo.create_contradiction(
|
||
item_a_id=request.item_a_id,
|
||
item_b_id=request.item_b_id,
|
||
explanation=request.explanation,
|
||
severity=request.severity,
|
||
suggested_resolution=request.suggested_resolution,
|
||
)
|
||
return {"id": cid}
|
||
|
||
|
||
@router.post("/admin/contradictions/{contradiction_id}/resolve")
|
||
async def admin_resolve_contradiction(
|
||
contradiction_id: str,
|
||
request: ResolveContradictionRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Resolve a knowledge contradiction."""
|
||
repo = KnowledgeRepository(conn)
|
||
contradiction = repo.get_contradiction(contradiction_id)
|
||
if not contradiction:
|
||
raise HTTPException(status_code=404, detail="Contradiction not found")
|
||
if contradiction.get("resolved"):
|
||
raise HTTPException(status_code=400, detail="Contradiction already resolved")
|
||
|
||
valid_resolutions = ["kept_a", "kept_b", "merged", "both_valid"]
|
||
if request.resolution not in valid_resolutions:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Resolution must be one of: {valid_resolutions}",
|
||
)
|
||
|
||
repo.resolve_contradiction(contradiction_id, user["email"], request.resolution)
|
||
_audit_action(conn, user["email"], "resolve_contradiction", contradiction_id, {
|
||
"resolution": request.resolution,
|
||
"item_a_id": contradiction["item_a_id"],
|
||
"item_b_id": contradiction["item_b_id"],
|
||
})
|
||
return {"id": contradiction_id, "resolved": True, "resolution": request.resolution}
|
||
|
||
|
||
# ---- Admin duplicate-candidate endpoints (issue #62) ----
|
||
|
||
VALID_DUPLICATE_RESOLUTIONS = ["duplicate", "different", "dismissed"]
|
||
DUPLICATE_RELATION_TYPE = "likely_duplicate"
|
||
|
||
|
||
def _strip_personal(item: Optional[dict], hide: bool) -> Optional[dict]:
|
||
"""Return a placeholder dict when ``item`` is personal and ``hide`` is set."""
|
||
if item is None:
|
||
return None
|
||
if hide and item.get("is_personal"):
|
||
return {"id": item.get("id"), "hidden": True}
|
||
return item
|
||
|
||
|
||
@router.get("/admin/duplicate-candidates")
|
||
async def admin_duplicate_candidates(
|
||
resolved: Optional[bool] = None,
|
||
exclude_personal: bool = True,
|
||
limit: int = 100,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""List duplicate-candidate relations for admin review.
|
||
|
||
Pass ``resolved=true`` or ``resolved=false`` to filter; omit both to fetch
|
||
every state (the original UI default). The web UI keeps surfacing the
|
||
actionable backlog by passing ``resolved=false`` explicitly.
|
||
|
||
With ``exclude_personal=true`` (default) personal items in the pair are
|
||
replaced with ``{id, hidden: true}`` — the relation row is still visible
|
||
so admins can resolve it, but content stays inside the personal-item
|
||
privacy boundary (ADR Decision 1 precedent).
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
relations = repo.list_relations(
|
||
relation_type=DUPLICATE_RELATION_TYPE,
|
||
resolved=resolved,
|
||
limit=limit,
|
||
)
|
||
item_ids = list({
|
||
id_
|
||
for r in relations
|
||
for id_ in (r["item_a_id"], r["item_b_id"])
|
||
})
|
||
items_by_id = repo.get_by_ids(item_ids) if item_ids else {}
|
||
for r in relations:
|
||
r["item_a"] = _strip_personal(items_by_id.get(r["item_a_id"]), exclude_personal)
|
||
r["item_b"] = _strip_personal(items_by_id.get(r["item_b_id"]), exclude_personal)
|
||
return {"relations": relations, "count": len(relations)}
|
||
|
||
|
||
@router.post("/admin/duplicate-candidates/resolve")
|
||
async def admin_resolve_duplicate_candidate(
|
||
item_a_id: str,
|
||
item_b_id: str,
|
||
request: ResolveDuplicateRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Resolve a duplicate-candidate relation.
|
||
|
||
Admin chooses: ``duplicate`` (acknowledge), ``different`` (false
|
||
positive), or ``dismissed`` (don't surface again, but no judgment).
|
||
Idempotent re-resolve is rejected with 400 — the audit trail wants one
|
||
decision per pair.
|
||
"""
|
||
if request.resolution not in VALID_DUPLICATE_RESOLUTIONS:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"resolution must be one of: {VALID_DUPLICATE_RESOLUTIONS}",
|
||
)
|
||
repo = KnowledgeRepository(conn)
|
||
existing = repo.get_relation(item_a_id, item_b_id, DUPLICATE_RELATION_TYPE)
|
||
if not existing:
|
||
raise HTTPException(status_code=404, detail="Duplicate-candidate relation not found")
|
||
if existing.get("resolved"):
|
||
raise HTTPException(status_code=400, detail="Relation already resolved")
|
||
|
||
repo.resolve_relation(
|
||
item_a_id=item_a_id,
|
||
item_b_id=item_b_id,
|
||
relation_type=DUPLICATE_RELATION_TYPE,
|
||
resolved_by=user["email"],
|
||
resolution=request.resolution,
|
||
)
|
||
# Resource-id of audit row is the canonical (a,b) pair for grep-ability.
|
||
a, b = sorted([item_a_id, item_b_id])
|
||
_audit_action(
|
||
conn,
|
||
user["email"],
|
||
"resolve_duplicate",
|
||
f"{a}::{b}",
|
||
{"resolution": request.resolution, "item_a_id": a, "item_b_id": b},
|
||
)
|
||
return {
|
||
"item_a_id": a,
|
||
"item_b_id": b,
|
||
"resolved": True,
|
||
"resolution": request.resolution,
|
||
}
|
||
|
||
|
||
# ---- Admin PATCH + bulk-update + tree endpoints (issue #62) ----
|
||
|
||
|
||
@router.patch("/admin/{item_id}")
|
||
async def admin_patch_item(
|
||
item_id: str,
|
||
request: PatchItemRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Partial update — accepts category/domain/tags/audience/title/content.
|
||
|
||
Replaces the narrow ``POST /api/memory/admin/edit`` (kept one release as
|
||
a thin alias). Audit row tagged ``corporate_memory.update_item`` records
|
||
which fields changed (not the full diff — keep audit rows compact).
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
_get_item_or_404(repo, item_id)
|
||
|
||
# ``exclude_unset=True`` preserves explicit ``null`` values from the request
|
||
# body so callers can clear previously-set Optional fields (e.g. PATCH
|
||
# ``{"audience": null}`` resets audience to NULL). With ``exclude_none=True``
|
||
# those nulls were silently dropped — the only path to clear was the
|
||
# empty-string short-circuit on ``domain``, and ``audience`` had no clearing
|
||
# path at all. See PR #126 round-4 review.
|
||
updates = request.model_dump(exclude_unset=True)
|
||
if "domain" in updates and updates["domain"] and updates["domain"] not in VALID_DOMAINS:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"domain must be one of: {VALID_DOMAINS}",
|
||
)
|
||
# ``title`` is NOT NULL in the schema. ``exclude_unset=True`` lets explicit
|
||
# ``null`` through, which would 500 on a DuckDB constraint violation. Reject
|
||
# at the boundary so the caller gets a 400 with a clear message instead.
|
||
if "title" in updates and updates["title"] is None:
|
||
raise HTTPException(status_code=400, detail="title cannot be null")
|
||
if not updates:
|
||
return {"id": item_id, "updated": []}
|
||
|
||
# tags is a list — JSON-encode to match the column type, mirroring create().
|
||
repo_kwargs = dict(updates)
|
||
if "tags" in repo_kwargs:
|
||
repo_kwargs["tags"] = (
|
||
json.dumps(repo_kwargs["tags"]) if repo_kwargs["tags"] else None
|
||
)
|
||
repo.update(item_id, **repo_kwargs)
|
||
_audit_action(
|
||
conn,
|
||
user["email"],
|
||
"update_item",
|
||
item_id,
|
||
{"updated_fields": sorted(updates.keys())},
|
||
)
|
||
return {"id": item_id, "updated": sorted(updates.keys())}
|
||
|
||
|
||
@router.post("/admin/bulk-update")
|
||
async def admin_bulk_update(
|
||
request: BulkUpdateRequest,
|
||
user: dict = Depends(require_admin),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Apply ``updates`` to every id in ``item_ids``. Per-id audit rows.
|
||
|
||
Returns a per-id status map plus rolled-up convenience lists (200 even on
|
||
partial failure — the body distinguishes successes from misses).
|
||
"""
|
||
repo = KnowledgeRepository(conn)
|
||
updates = dict(request.updates or {})
|
||
# Reject governance-sensitive fields BEFORE hitting the repo. _UPDATABLE_FIELDS
|
||
# in the repo is broad on purpose; this endpoint is the narrow path. Callers
|
||
# that need to flip status/sensitivity/is_personal must use the dedicated
|
||
# governance endpoints so the right audit row is written. See PR #126 review.
|
||
disallowed = sorted(k for k in updates.keys() if k not in _BULK_UPDATE_ALLOWED)
|
||
if disallowed:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=(
|
||
f"updates contains disallowed field(s): {disallowed}. "
|
||
f"Allowed: {sorted(_BULK_UPDATE_ALLOWED)}"
|
||
),
|
||
)
|
||
if "domain" in updates and updates["domain"] and updates["domain"] not in VALID_DOMAINS:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"domain must be one of: {VALID_DOMAINS}",
|
||
)
|
||
# Mirror the PATCH boundary check — title is NOT NULL in the schema, so
|
||
# an explicit null here would fall through to a per-item Constraint Error
|
||
# in repo.bulk_update() instead of a clean 400 to the caller.
|
||
if "title" in updates and updates["title"] is None:
|
||
raise HTTPException(status_code=400, detail="title cannot be null")
|
||
if not request.item_ids:
|
||
return {"updated": [], "not_found": [], "errors": {}}
|
||
|
||
statuses = repo.bulk_update(request.item_ids, updates)
|
||
|
||
# Allowlist already enforced above, so every key in updates is auditable.
|
||
audited_fields = sorted(updates.keys())
|
||
updated: List[str] = []
|
||
not_found: List[str] = []
|
||
errors: dict = {}
|
||
for item_id, status in statuses.items():
|
||
if status == "updated":
|
||
updated.append(item_id)
|
||
_audit_action(
|
||
conn,
|
||
user["email"],
|
||
"bulk_update",
|
||
item_id,
|
||
{"updated_fields": audited_fields, "batch": True},
|
||
)
|
||
elif status == "not_found":
|
||
not_found.append(item_id)
|
||
else:
|
||
errors[item_id] = status
|
||
return {"updated": updated, "not_found": not_found, "errors": errors}
|
||
|
||
|
||
# Axes the tree endpoint groups by. Anything else → 400. Order matters for
|
||
# the default chip rendering in the UI.
|
||
_TREE_AXES = ("domain", "category", "tag", "audience")
|
||
|
||
|
||
def _label_for_axis(axis: str, key: Optional[str]) -> str:
|
||
"""Pretty bucket label for the tree UI. Falls back to the raw key."""
|
||
if key is None or key == "":
|
||
return {
|
||
"domain": "(no domain)",
|
||
"category": "(no category)",
|
||
"tag": "(no tag)",
|
||
"audience": "All users",
|
||
}.get(axis, "(unset)")
|
||
if axis == "audience" and key == "all":
|
||
return "All users"
|
||
if axis == "audience" and key.startswith("group:"):
|
||
return f"Group: {key[len('group:'):]}"
|
||
return key
|
||
|
||
|
||
def _matches_chip_filters(
|
||
item: dict,
|
||
*,
|
||
status_filter: Optional[str],
|
||
source_type: Optional[str],
|
||
audience: Optional[str],
|
||
has_duplicate_ids: Optional[set],
|
||
q: Optional[str],
|
||
) -> bool:
|
||
"""Apply chip filters to an already-RBAC-filtered item."""
|
||
if status_filter and item.get("status") != status_filter:
|
||
return False
|
||
if source_type and item.get("source_type") != source_type:
|
||
return False
|
||
if audience:
|
||
# Treat NULL audience as 'all' so chip-filter behavior matches the
|
||
# SQL audience filter, ``count_by_audience`` (COALESCE→'all'), and the
|
||
# tree's ``_bucket_key`` (NULL → 'all'). Without this coalesce,
|
||
# NULL-audience items disappear from the ``audience=all`` chip even
|
||
# though the rest of the system treats them as visible-to-everyone.
|
||
item_audience = item.get("audience") or "all"
|
||
if item_audience != audience:
|
||
return False
|
||
if has_duplicate_ids is not None and item.get("id") not in has_duplicate_ids:
|
||
return False
|
||
if q:
|
||
needle = q.lower()
|
||
title = (item.get("title") or "").lower()
|
||
content = (item.get("content") or "").lower()
|
||
if needle not in title and needle not in content:
|
||
return False
|
||
return True
|
||
|
||
|
||
@router.get("/tree")
|
||
async def get_tree(
|
||
axis: str = "domain",
|
||
status_filter: Optional[str] = None,
|
||
source_type: Optional[str] = None,
|
||
audience: Optional[str] = None,
|
||
q: Optional[str] = None,
|
||
has_duplicate: bool = False,
|
||
exclude_personal: bool = True,
|
||
page: int = 1,
|
||
per_page: int = 50,
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Server-side grouping for the Browse / Group-by tree (issue #62).
|
||
|
||
Returns ``{groups: [{key, label, count, items: [...]}]}`` already
|
||
RBAC-filtered + chip-filtered. The same ``_effective_groups`` and
|
||
``_can_view_item`` helpers used by ``GET /api/memory`` apply, so a
|
||
non-admin caller never sees personal items belonging to others, and
|
||
audience-restricted items only surface for members of the audience
|
||
group.
|
||
|
||
On the ``tag`` axis a single item appears once per tag it holds — that
|
||
is the intended "overlapping bucket" affordance. Every other axis puts
|
||
each item in its single canonical bucket.
|
||
"""
|
||
if axis not in _TREE_AXES:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"axis must be one of: {list(_TREE_AXES)}",
|
||
)
|
||
repo = KnowledgeRepository(conn)
|
||
is_priv = _is_privileged_viewer(user, conn)
|
||
effective_groups = _effective_groups(user, conn)
|
||
# Privacy parity with ``GET /api/memory``: non-admin can never opt out.
|
||
effective_exclude_personal = True if not is_priv else exclude_personal
|
||
|
||
page = max(page, 1)
|
||
per_page = max(min(per_page, 500), 1)
|
||
|
||
# has_duplicate=true narrows the candidate set to items present in any
|
||
# unresolved likely_duplicate relation. Computed once; intersected per
|
||
# item below.
|
||
has_duplicate_ids: Optional[set] = None
|
||
if has_duplicate:
|
||
rels = repo.list_relations(
|
||
relation_type=DUPLICATE_RELATION_TYPE, resolved=False, limit=10000,
|
||
)
|
||
has_duplicate_ids = {
|
||
id_ for r in rels for id_ in (r["item_a_id"], r["item_b_id"])
|
||
}
|
||
|
||
# Audience-axis privacy (decision 13): non-admins only see their own
|
||
# group buckets + null/all. Use the audience pre-filter on the SQL side
|
||
# so non-admins never accidentally see another group's bucket count.
|
||
granted_domains = _caller_granted_memory_domains(user, conn)
|
||
statuses = [status_filter] if status_filter else None
|
||
items = repo.list_items(
|
||
statuses=statuses,
|
||
source_type=source_type,
|
||
exclude_personal=effective_exclude_personal,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
limit=10000,
|
||
offset=0,
|
||
)
|
||
|
||
# Apply remaining chip filters that don't have a SQL layer yet.
|
||
visible: List[dict] = []
|
||
for item in items:
|
||
if not _can_view_item(user, item, is_priv):
|
||
continue
|
||
if not _matches_chip_filters(
|
||
item,
|
||
status_filter=None, # already in SQL
|
||
source_type=None, # already in SQL
|
||
audience=audience,
|
||
has_duplicate_ids=has_duplicate_ids,
|
||
q=q,
|
||
):
|
||
continue
|
||
visible.append(item)
|
||
|
||
# Group items by axis. tag is multi-bucket; everything else is single.
|
||
groups: dict = {}
|
||
|
||
def _bucket_key(item: dict, axis: str) -> List[str]:
|
||
if axis == "tag":
|
||
tags = item.get("tags")
|
||
if isinstance(tags, str):
|
||
try:
|
||
tags = json.loads(tags)
|
||
except json.JSONDecodeError:
|
||
tags = []
|
||
if not tags:
|
||
return [""]
|
||
return [str(t) for t in tags]
|
||
if axis == "audience":
|
||
aud = item.get("audience")
|
||
return [aud if aud else "all"]
|
||
if axis == "category":
|
||
return [item.get("category") or ""]
|
||
# default: domain
|
||
return [item.get("domain") or ""]
|
||
|
||
for item in visible:
|
||
for key in _bucket_key(item, axis):
|
||
bucket = groups.setdefault(key, {
|
||
"key": key,
|
||
"label": _label_for_axis(axis, key),
|
||
"items": [],
|
||
})
|
||
bucket["items"].append(item)
|
||
|
||
# Stable ordering: alphabetic on key with the empty bucket sinking to
|
||
# the bottom — the UI usually wants the "real" buckets up top.
|
||
ordered = sorted(
|
||
groups.values(),
|
||
key=lambda g: (g["key"] == "", g["key"].lower() if isinstance(g["key"], str) else ""),
|
||
)
|
||
for g in ordered:
|
||
g["count"] = len(g["items"])
|
||
|
||
# Page over groups (not items): operators paging through hundreds of
|
||
# tags want bucket-level pagination. The UI expands a bucket to see all
|
||
# its items.
|
||
start = (page - 1) * per_page
|
||
paged = ordered[start:start + per_page]
|
||
return {
|
||
"axis": axis,
|
||
"groups": paged,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_groups": len(ordered),
|
||
"total_items": sum(g["count"] for g in ordered),
|
||
}
|
||
|
||
|
||
# ---- Bundle endpoint ----
|
||
|
||
@router.get("/bundle")
|
||
async def get_bundle(
|
||
user: dict = Depends(get_current_user),
|
||
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
||
):
|
||
"""Token-budgeted bundle of knowledge items for AI agent injection.
|
||
|
||
Mandatory items are always included regardless of the token budget.
|
||
Approved items are confidence×recency-ranked and included until the budget
|
||
is exhausted. Audience-filtered by the caller's group memberships (admins
|
||
see everything).
|
||
"""
|
||
from datetime import datetime, timezone
|
||
|
||
repo = KnowledgeRepository(conn)
|
||
effective_groups = _effective_groups(user, conn)
|
||
granted_domains = _caller_granted_memory_domains(user, conn)
|
||
|
||
# v46: the bundle is what AI agents inject as context, so the opt-out
|
||
# has real effect here — it's always-on for the calling user. Mandatory
|
||
# items are exempted by the EXISTS subquery's status guard inside
|
||
# ``list_items``; the user's dismissal row for a then-approved item is
|
||
# silently ignored if/when the item is later mandated.
|
||
dismissed_by_user_id = user["id"]
|
||
mandatory = repo.list_items(
|
||
statuses=["mandatory"],
|
||
exclude_personal=True,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
dismissed_by_user=dismissed_by_user_id,
|
||
hide_dismissed=True,
|
||
limit=1000,
|
||
offset=0,
|
||
)
|
||
|
||
approved = repo.list_items(
|
||
statuses=["approved"],
|
||
exclude_personal=True,
|
||
user_groups=effective_groups,
|
||
granted_domains=granted_domains,
|
||
dismissed_by_user=dismissed_by_user_id,
|
||
hide_dismissed=True,
|
||
limit=1000,
|
||
offset=0,
|
||
)
|
||
|
||
# Rank approved by confidence × recency (days since updated_at, max 365).
|
||
# updated_at is intentional: a recently admin-edited item reflects a human
|
||
# who just reviewed and corrected it, making it more trustworthy than an
|
||
# older untouched item. This differs from confidence.py which decays from
|
||
# created_at — the two scores serve different purposes (credibility vs freshness).
|
||
now = datetime.now(timezone.utc)
|
||
|
||
def _rank(item: dict) -> float:
|
||
confidence = float(item["confidence"]) if item.get("confidence") is not None else 0.5
|
||
updated_raw = item.get("updated_at")
|
||
if updated_raw:
|
||
try:
|
||
if isinstance(updated_raw, str):
|
||
from datetime import datetime as dt
|
||
updated = dt.fromisoformat(updated_raw.replace("Z", "+00:00"))
|
||
else:
|
||
updated = updated_raw
|
||
if updated.tzinfo is None:
|
||
from datetime import timezone as tz
|
||
updated = updated.replace(tzinfo=tz.utc)
|
||
age_days = max((now - updated).days, 0)
|
||
except Exception:
|
||
age_days = 365
|
||
else:
|
||
age_days = 365
|
||
recency = max(0.0, 1.0 - age_days / 365.0)
|
||
return confidence * recency
|
||
|
||
approved_ranked = sorted(approved, key=_rank, reverse=True)
|
||
|
||
def _token_est(item: dict) -> int:
|
||
return len((item.get("title", "") + " " + item.get("content", ""))) // _CHARS_PER_TOKEN
|
||
|
||
budget_remaining = BUNDLE_TOKEN_BUDGET - sum(_token_est(i) for i in mandatory)
|
||
approved_included = []
|
||
for item in approved_ranked:
|
||
cost = _token_est(item)
|
||
if budget_remaining - cost < 0:
|
||
break
|
||
approved_included.append(item)
|
||
budget_remaining -= cost
|
||
|
||
return {
|
||
"mandatory": mandatory,
|
||
"approved": approved_included,
|
||
"token_estimate": BUNDLE_TOKEN_BUDGET - budget_remaining,
|
||
"token_budget": BUNDLE_TOKEN_BUDGET,
|
||
}
|