Adds corporate memory v1 (verification flywheel + contradiction detection + confidence scoring) and v1.5 (audience-based distribution + per-item privacy + admin curation). Server: GET /api/memory/bundle returns mandatory + ranked-approved items within a token budget; POST /api/memory/admin/mandate accepts an audience field gated against user_group_members; /api/memory/stats uses SQL aggregation. CLI: da sync writes received items to .claude/rules/km_*.md. Verification detector extracts knowledge candidates from session JSONL files. Auto-tagging via Haiku when ai: is configured. Adapted from the v9-era branch onto v13/v14 RBAC: _is_privileged_viewer + _effective_groups now query user_group_members JOIN user_groups; require_role(Role.KM_ADMIN) replaced with require_admin (km_admin collapsed into admin). Schema v15: knowledge_items context-engineering columns + knowledge_contradictions + session_extraction_state. Schema v16: verification_evidence. Cuts release v0.15.0 (also bundles #116 /me/debug page).
85 lines
2.3 KiB
Python
85 lines
2.3 KiB
Python
"""Entity resolution v1 for corporate memory.
|
|
|
|
Simple case-insensitive string matching against a static entity registry.
|
|
Runs as post-processing on new knowledge items to tag them with recognized entities.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def build_entity_registry(
|
|
groups: dict[str, Any] | None = None,
|
|
domain_owners: dict[str, list[str]] | None = None,
|
|
entity_config: dict[str, list[str]] | None = None,
|
|
metric_names: list[str] | None = None,
|
|
) -> dict[str, list[str]]:
|
|
"""Build a flat entity registry from various config sources.
|
|
|
|
Returns dict mapping category -> list of entity names.
|
|
"""
|
|
registry: dict[str, list[str]] = {}
|
|
|
|
if groups:
|
|
registry["teams"] = list(groups.keys())
|
|
|
|
if domain_owners:
|
|
registry["domains"] = list(domain_owners.keys())
|
|
|
|
if entity_config:
|
|
for category, entities in entity_config.items():
|
|
registry[category] = entities
|
|
|
|
if metric_names:
|
|
existing_metrics = registry.get("metrics", [])
|
|
registry["metrics"] = list(set(existing_metrics + metric_names))
|
|
|
|
return registry
|
|
|
|
|
|
def resolve_entities(
|
|
content: str,
|
|
title: str,
|
|
entity_registry: dict[str, list[str]],
|
|
) -> list[str]:
|
|
"""Find entity matches in title and content using case-insensitive substring matching.
|
|
|
|
Returns deduplicated list of matched entity names.
|
|
"""
|
|
text = f"{title} {content}".lower()
|
|
matched: set[str] = set()
|
|
|
|
for _category, entities in entity_registry.items():
|
|
for entity in entities:
|
|
if entity.lower() in text:
|
|
matched.add(entity)
|
|
|
|
return sorted(matched)
|
|
|
|
|
|
def resolve_and_merge(
|
|
item: dict,
|
|
entity_registry: dict[str, list[str]],
|
|
) -> list[str]:
|
|
"""Resolve entities for an item and merge with any existing entity tags.
|
|
|
|
Returns combined deduplicated entity list.
|
|
"""
|
|
existing = item.get("entities") or []
|
|
if isinstance(existing, str):
|
|
try:
|
|
existing = json.loads(existing)
|
|
except (json.JSONDecodeError, TypeError):
|
|
existing = []
|
|
|
|
resolved = resolve_entities(
|
|
content=item.get("content", ""),
|
|
title=item.get("title", ""),
|
|
entity_registry=entity_registry,
|
|
)
|
|
|
|
combined = set(existing) | set(resolved)
|
|
return sorted(combined)
|