"""Repository for corporate memory knowledge items, votes, and contradictions.""" import json import uuid from datetime import datetime, timezone from typing import Any, Optional, List, Dict import duckdb class KnowledgeRepository: def __init__(self, conn: duckdb.DuckDBPyConnection): self.conn = conn def _row_to_dict(self, row) -> Optional[Dict[str, Any]]: if not row: return None columns = [desc[0] for desc in self.conn.description] return dict(zip(columns, row)) def _rows_to_dicts(self, rows) -> List[Dict[str, Any]]: if not rows: return [] columns = [desc[0] for desc in self.conn.description] return [dict(zip(columns, row)) for row in rows] def get_by_id(self, item_id: str) -> Optional[Dict[str, Any]]: result = self.conn.execute("SELECT * FROM knowledge_items WHERE id = ?", [item_id]).fetchone() return self._row_to_dict(result) def get_by_ids(self, item_ids: List[str]) -> Dict[str, Any]: """Fetch multiple items by ID in one query. Returns dict keyed by id.""" if not item_ids: return {} placeholders = ", ".join("?" for _ in item_ids) rows = self.conn.execute( f"SELECT * FROM knowledge_items WHERE id IN ({placeholders})", item_ids, ).fetchall() items = self._rows_to_dicts(rows) return {item["id"]: item for item in items} def create( self, id: str, title: str, content: str, category: str, source_user: Optional[str] = None, tags: Optional[List[str]] = None, status: str = "pending", confidence: Optional[float] = None, domain: Optional[str] = None, entities: Optional[List[str]] = None, source_type: str = "claude_local_md", source_ref: Optional[str] = None, valid_from: Optional[datetime] = None, valid_until: Optional[datetime] = None, supersedes: Optional[str] = None, sensitivity: str = "internal", is_personal: bool = False, ) -> None: now = datetime.now(timezone.utc) self.conn.execute( """INSERT INTO knowledge_items ( id, title, content, category, source_user, tags, status, confidence, domain, entities, source_type, source_ref, valid_from, valid_until, supersedes, sensitivity, is_personal, created_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", [ id, title, content, category, source_user, json.dumps(tags) if tags else None, status, confidence, domain, json.dumps(entities) if entities else None, source_type, source_ref, valid_from, valid_until, supersedes, sensitivity, is_personal, now, now, ], ) _UPDATABLE_FIELDS = { "title", "content", "category", "tags", "domain", "entities", "source_type", "source_ref", "source_user", "audience", "confidence", "status", "sensitivity", "is_personal", "valid_from", "valid_until", "supersedes", } def update(self, item_id: str, **fields) -> None: safe = {k: v for k, v in fields.items() if k in self._UPDATABLE_FIELDS} if not safe: return now = datetime.now(timezone.utc) set_clause = ", ".join(f"{k} = ?" for k in safe) values = list(safe.values()) + [now, item_id] self.conn.execute( f"UPDATE knowledge_items SET {set_clause}, updated_at = ? WHERE id = ?", values, ) def update_status(self, item_id: str, status: str) -> None: now = datetime.now(timezone.utc) self.conn.execute( "UPDATE knowledge_items SET status = ?, updated_at = ? WHERE id = ?", [status, now, item_id], ) def list_items( self, statuses: Optional[List[str]] = None, category: Optional[str] = None, domain: Optional[str] = None, source_type: Optional[str] = None, exclude_personal: bool = False, user_groups: Optional[List[str]] = None, granted_domains: Optional[List[str]] = None, limit: int = 100, offset: int = 0, ) -> List[Dict[str, Any]]: query = "SELECT * FROM knowledge_items WHERE 1=1" params: List[Any] = [] if statuses: placeholders = ", ".join("?" for _ in statuses) query += f" AND status IN ({placeholders})" params.extend(statuses) if category: query += " AND category = ?" params.append(category) if domain: query += " AND domain = ?" params.append(domain) if source_type: query += " AND source_type = ?" params.append(source_type) if exclude_personal: query += " AND (is_personal = FALSE OR is_personal IS NULL)" if user_groups is not None: # Visibility: audience-string match (null/all/group:X) OR # caller has been granted access to the item's domain via # resource_grants (MEMORY_DOMAIN). When ``granted_domains`` is # falsy the OR clause collapses, preserving pre-RBAC behaviour. visibility_clauses = ["audience IS NULL", "audience = 'all'"] if user_groups: audience_placeholders = ", ".join("?" for _ in user_groups) visibility_clauses.append(f"audience IN ({audience_placeholders})") params.extend(user_groups) if granted_domains: domain_placeholders = ", ".join("?" for _ in granted_domains) visibility_clauses.append(f"domain IN ({domain_placeholders})") params.extend(granted_domains) query += " AND (" + " OR ".join(visibility_clauses) + ")" query += " ORDER BY updated_at DESC LIMIT ? OFFSET ?" params.extend([limit, offset]) return self._rows_to_dicts(self.conn.execute(query, params).fetchall()) def search( self, query: str, exclude_personal: bool = False, user_groups: Optional[List[str]] = None, granted_domains: Optional[List[str]] = None, statuses: Optional[List[str]] = None, category: Optional[str] = None, domain: Optional[str] = None, source_type: Optional[str] = None, limit: int = 100, offset: int = 0, ) -> List[Dict[str, Any]]: pattern = f"%{query}%" sql = """SELECT * FROM knowledge_items WHERE (title ILIKE ? OR content ILIKE ?)""" params: List[Any] = [pattern, pattern] if statuses: placeholders = ", ".join("?" for _ in statuses) sql += f" AND status IN ({placeholders})" params.extend(statuses) if category: sql += " AND category = ?" params.append(category) if domain: sql += " AND domain = ?" params.append(domain) if source_type: sql += " AND source_type = ?" params.append(source_type) if exclude_personal: sql += " AND (is_personal = FALSE OR is_personal IS NULL)" if user_groups is not None: visibility_clauses = ["audience IS NULL", "audience = 'all'"] if user_groups: audience_placeholders = ", ".join("?" for _ in user_groups) visibility_clauses.append(f"audience IN ({audience_placeholders})") params.extend(user_groups) if granted_domains: domain_placeholders = ", ".join("?" for _ in granted_domains) visibility_clauses.append(f"domain IN ({domain_placeholders})") params.extend(granted_domains) sql += " AND (" + " OR ".join(visibility_clauses) + ")" sql += " ORDER BY updated_at DESC LIMIT ? OFFSET ?" params.extend([limit, offset]) results = self.conn.execute(sql, params).fetchall() return self._rows_to_dicts(results) def count_items( self, search: Optional[str] = None, statuses: Optional[List[str]] = None, category: Optional[str] = None, domain: Optional[str] = None, source_type: Optional[str] = None, exclude_personal: bool = False, user_groups: Optional[List[str]] = None, granted_domains: Optional[List[str]] = None, ) -> int: if search: pattern = f"%{search}%" sql = "SELECT COUNT(*) FROM knowledge_items WHERE (title ILIKE ? OR content ILIKE ?)" params: List[Any] = [pattern, pattern] else: sql = "SELECT COUNT(*) FROM knowledge_items WHERE 1=1" params = [] if statuses: placeholders = ", ".join("?" for _ in statuses) sql += f" AND status IN ({placeholders})" params.extend(statuses) if category: sql += " AND category = ?" params.append(category) if domain: sql += " AND domain = ?" params.append(domain) if source_type: sql += " AND source_type = ?" params.append(source_type) if exclude_personal: sql += " AND (is_personal = FALSE OR is_personal IS NULL)" if user_groups is not None: visibility_clauses = ["audience IS NULL", "audience = 'all'"] if user_groups: audience_placeholders = ", ".join("?" for _ in user_groups) visibility_clauses.append(f"audience IN ({audience_placeholders})") params.extend(user_groups) if granted_domains: domain_placeholders = ", ".join("?" for _ in granted_domains) visibility_clauses.append(f"domain IN ({domain_placeholders})") params.extend(granted_domains) sql += " AND (" + " OR ".join(visibility_clauses) + ")" return self.conn.execute(sql, params).fetchone()[0] def list_by_domain( self, domain: str, statuses: Optional[List[str]] = None, limit: int = 100, ) -> List[Dict[str, Any]]: query = "SELECT * FROM knowledge_items WHERE domain = ?" params: List[Any] = [domain] if statuses: placeholders = ", ".join("?" for _ in statuses) query += f" AND status IN ({placeholders})" params.extend(statuses) query += " ORDER BY updated_at DESC LIMIT ?" params.append(limit) return self._rows_to_dicts(self.conn.execute(query, params).fetchall()) def get_user_contributions(self, source_user: str) -> List[Dict[str, Any]]: results = self.conn.execute( "SELECT * FROM knowledge_items WHERE source_user = ? ORDER BY updated_at DESC", [source_user], ).fetchall() return self._rows_to_dicts(results) def set_personal(self, item_id: str, is_personal: bool) -> None: now = datetime.now(timezone.utc) self.conn.execute( "UPDATE knowledge_items SET is_personal = ?, updated_at = ? WHERE id = ?", [is_personal, now, item_id], ) # --- Votes --- def vote(self, item_id: str, user_id: str, vote: int) -> None: now = datetime.now(timezone.utc) self.conn.execute( """INSERT INTO knowledge_votes (item_id, user_id, vote, voted_at) VALUES (?, ?, ?, ?) ON CONFLICT (item_id, user_id) DO UPDATE SET vote = excluded.vote, voted_at = excluded.voted_at""", [item_id, user_id, vote, now], ) def unvote(self, item_id: str, user_id: str) -> None: self.conn.execute( "DELETE FROM knowledge_votes WHERE item_id = ? AND user_id = ?", [item_id, user_id], ) def get_votes(self, item_id: str) -> Dict[str, int]: result = self.conn.execute( """SELECT COALESCE(SUM(CASE WHEN vote > 0 THEN 1 ELSE 0 END), 0) as upvotes, COALESCE(SUM(CASE WHEN vote < 0 THEN 1 ELSE 0 END), 0) as downvotes FROM knowledge_votes WHERE item_id = ?""", [item_id], ).fetchone() return {"upvotes": result[0], "downvotes": result[1]} # --- Contradictions --- def create_contradiction( self, item_a_id: str, item_b_id: str, explanation: str, severity: Optional[str] = None, suggested_resolution: Optional[Any] = None, ) -> str: """Persist a contradiction. ``suggested_resolution`` may be either a free-form string (legacy callers) or a dict (the structured shape produced by Haiku — see ADR Decision 4). Dicts are JSON-encoded into the existing TEXT column so no schema migration is needed; the read side decodes back to dict. """ if isinstance(suggested_resolution, dict): suggested_resolution_db: Optional[str] = json.dumps(suggested_resolution) else: suggested_resolution_db = suggested_resolution contradiction_id = f"kc_{uuid.uuid4().hex[:12]}" self.conn.execute( """INSERT INTO knowledge_contradictions ( id, item_a_id, item_b_id, explanation, severity, suggested_resolution ) VALUES (?, ?, ?, ?, ?, ?)""", [contradiction_id, item_a_id, item_b_id, explanation, severity, suggested_resolution_db], ) return contradiction_id @staticmethod def _decode_suggested_resolution(row: Dict[str, Any]) -> Dict[str, Any]: """If the stored suggested_resolution is JSON, decode it to a dict. Plain strings (legacy rows) are returned unchanged. """ raw = row.get("suggested_resolution") if isinstance(raw, str) and raw and raw.lstrip().startswith("{"): try: row["suggested_resolution"] = json.loads(raw) except json.JSONDecodeError: pass return row def list_contradictions( self, resolved: Optional[bool] = None, limit: int = 100, ) -> List[Dict[str, Any]]: query = "SELECT * FROM knowledge_contradictions WHERE 1=1" params: List[Any] = [] if resolved is not None: query += " AND resolved = ?" params.append(resolved) query += " ORDER BY detected_at DESC LIMIT ?" params.append(limit) rows = self._rows_to_dicts(self.conn.execute(query, params).fetchall()) return [self._decode_suggested_resolution(r) for r in rows] def resolve_contradiction( self, contradiction_id: str, resolved_by: str, resolution: str, ) -> None: now = datetime.now(timezone.utc) self.conn.execute( """UPDATE knowledge_contradictions SET resolved = TRUE, resolved_by = ?, resolved_at = ?, resolution = ? WHERE id = ?""", [resolved_by, now, resolution, contradiction_id], ) def get_contradiction(self, contradiction_id: str) -> Optional[Dict[str, Any]]: result = self.conn.execute( "SELECT * FROM knowledge_contradictions WHERE id = ?", [contradiction_id], ).fetchone() row = self._row_to_dict(result) if row is not None: self._decode_suggested_resolution(row) return row # --- Verification Evidence --- def create_evidence( self, item_id: str, source_user: Optional[str] = None, source_ref: Optional[str] = None, detection_type: Optional[str] = None, user_quote: Optional[str] = None, ) -> str: """Persist one verification evidence row for a knowledge item. Multiple evidence rows per item are expected — each new analyst confirmation/correction adds one. user_quote and detection_type are the raw signal future Bayesian re-calibration consumes. """ evidence_id = f"ev_{uuid.uuid4().hex[:12]}" self.conn.execute( """INSERT INTO verification_evidence ( id, item_id, source_user, source_ref, detection_type, user_quote ) VALUES (?, ?, ?, ?, ?, ?)""", [evidence_id, item_id, source_user, source_ref, detection_type, user_quote], ) return evidence_id def list_evidence(self, item_id: str) -> List[Dict[str, Any]]: results = self.conn.execute( """SELECT * FROM verification_evidence WHERE item_id = ? ORDER BY created_at ASC""", [item_id], ).fetchall() return self._rows_to_dicts(results) # --- Session Extraction State --- def mark_session_processed( self, session_file: str, username: str, items_extracted: int = 0, file_hash: Optional[str] = None, ) -> None: now = datetime.now(timezone.utc) self.conn.execute( """INSERT INTO session_extraction_state (session_file, username, processed_at, items_extracted, file_hash) VALUES (?, ?, ?, ?, ?) ON CONFLICT (session_file) DO UPDATE SET processed_at = excluded.processed_at, items_extracted = excluded.items_extracted, file_hash = excluded.file_hash""", [session_file, username, now, items_extracted, file_hash], ) def is_session_processed(self, session_file: str) -> bool: result = self.conn.execute( "SELECT 1 FROM session_extraction_state WHERE session_file = ?", [session_file], ).fetchone() return result is not None # --- Item relations (duplicate-candidate hints, etc.) --- @staticmethod def _canonical_pair(a: str, b: str) -> tuple[str, str]: """Return (min(a,b), max(a,b)) — every unordered pair maps to one row.""" return (a, b) if a <= b else (b, a) def create_relation( self, item_a_id: str, item_b_id: str, relation_type: str, score: Optional[float] = None, ) -> None: """Persist a relation row. Idempotent on (item_a_id, item_b_id, relation_type). The PK is canonicalized to (min, max) so duplicate calls with reversed arguments don't create a second row. Self-relations (a == b) are rejected — a pair must reference two distinct items. """ if item_a_id == item_b_id: raise ValueError("Cannot create relation between an item and itself") a, b = self._canonical_pair(item_a_id, item_b_id) self.conn.execute( """INSERT INTO knowledge_item_relations (item_a_id, item_b_id, relation_type, score) VALUES (?, ?, ?, ?) ON CONFLICT (item_a_id, item_b_id, relation_type) DO NOTHING""", [a, b, relation_type, score], ) def list_relations( self, relation_type: Optional[str] = None, resolved: Optional[bool] = None, limit: int = 100, ) -> List[Dict[str, Any]]: sql = "SELECT * FROM knowledge_item_relations WHERE 1=1" params: List[Any] = [] if relation_type is not None: sql += " AND relation_type = ?" params.append(relation_type) if resolved is not None: sql += " AND resolved = ?" params.append(resolved) sql += " ORDER BY created_at DESC LIMIT ?" params.append(limit) return self._rows_to_dicts(self.conn.execute(sql, params).fetchall()) def resolve_relation( self, item_a_id: str, item_b_id: str, relation_type: str, resolved_by: str, resolution: str, ) -> int: """Mark a relation row resolved. Returns rowcount (0 if not found).""" a, b = self._canonical_pair(item_a_id, item_b_id) now = datetime.now(timezone.utc) # DuckDB doesn't expose UPDATE rowcount via the cursor API uniformly; # do an existence check first so callers can report 404 vs success. existing = self.conn.execute( """SELECT 1 FROM knowledge_item_relations WHERE item_a_id = ? AND item_b_id = ? AND relation_type = ?""", [a, b, relation_type], ).fetchone() if not existing: return 0 self.conn.execute( """UPDATE knowledge_item_relations SET resolved = TRUE, resolved_by = ?, resolved_at = ?, resolution = ? WHERE item_a_id = ? AND item_b_id = ? AND relation_type = ?""", [resolved_by, now, resolution, a, b, relation_type], ) return 1 def get_relation( self, item_a_id: str, item_b_id: str, relation_type: str, ) -> Optional[Dict[str, Any]]: a, b = self._canonical_pair(item_a_id, item_b_id) result = self.conn.execute( """SELECT * FROM knowledge_item_relations WHERE item_a_id = ? AND item_b_id = ? AND relation_type = ?""", [a, b, relation_type], ).fetchone() return self._row_to_dict(result) def find_duplicate_candidates_by_entities( self, new_item_id: str, entities: Optional[List[str]], domain: Optional[str], min_overlap: int, limit: int = 100, ) -> List[Dict[str, Any]]: """Same-domain candidates whose ``entities`` set shares >= ``min_overlap`` members with ``entities``. Personal items are excluded (privacy boundary — see ADR Decision 1 precedent in ``find_contradiction_candidates``). Self-id is excluded. Domain is a hard SQL conjunct: a NULL-domain item produces no candidates (matches the verification-detector skip-empty contract). Jaccard is computed in Python because DuckDB lacks a portable JSON intersection helper; the SQL layer trims the candidate set to the same domain so the Python loop scales linearly with that. """ if not entities or not domain: return [] new_set = set(entities) sql = """ SELECT * FROM knowledge_items WHERE status IN ('approved', 'mandatory', 'pending') AND (is_personal = FALSE OR is_personal IS NULL) AND domain = ? AND id != ? AND entities IS NOT NULL ORDER BY updated_at DESC LIMIT ? """ rows = self._rows_to_dicts( self.conn.execute(sql, [domain, new_item_id, limit]).fetchall() ) out: List[Dict[str, Any]] = [] for row in rows: cand_entities = row.get("entities") if isinstance(cand_entities, str): try: cand_entities = json.loads(cand_entities) except json.JSONDecodeError: continue if not isinstance(cand_entities, list) or not cand_entities: continue cand_set = set(cand_entities) overlap = new_set & cand_set if len(overlap) < min_overlap: continue union = new_set | cand_set jaccard = len(overlap) / len(union) if union else 0.0 row["overlap_count"] = len(overlap) row["jaccard"] = jaccard out.append(row) return out # --- Bulk update + tag/audience aggregations (issue #62) --- def bulk_update( self, item_ids: List[str], updates: Dict[str, Any], ) -> Dict[str, str]: """Apply ``updates`` to each item id; partial-failure tolerant. ``updates`` may include the standard ``_UPDATABLE_FIELDS`` keys plus the bulk-only ``tags_add`` / ``tags_remove`` lists. Tag mutations are merged with the item's existing tags so callers don't have to fetch first. Returns a per-id status map: ``"updated"`` / ``"not_found"`` / an error message. """ results: Dict[str, str] = {} if not item_ids: return results plain_fields = { k: v for k, v in updates.items() if k in self._UPDATABLE_FIELDS and k != "tags" } # If the caller passed an explicit ``tags`` list, treat it as a hard # set (same semantics as repo.update). Add/remove are applied per item. explicit_tags = updates.get("tags") if "tags" in updates else None tags_add = updates.get("tags_add") or [] tags_remove = updates.get("tags_remove") or [] for item_id in item_ids: try: item = self.get_by_id(item_id) if not item: results[item_id] = "not_found" continue per_item: Dict[str, Any] = dict(plain_fields) if explicit_tags is not None: per_item["tags"] = explicit_tags elif tags_add or tags_remove: existing = item.get("tags") or [] if isinstance(existing, str): try: existing = json.loads(existing) except json.JSONDecodeError: existing = [] if not isinstance(existing, list): existing = [] new_tags = list(existing) for t in tags_add: if t not in new_tags: new_tags.append(t) if tags_remove: rm = set(tags_remove) new_tags = [t for t in new_tags if t not in rm] per_item["tags"] = new_tags if not per_item: results[item_id] = "updated" # nothing to do, treat as success continue # JSON-encode tags before passing to .update (mirrors create()). if "tags" in per_item: per_item["tags"] = ( json.dumps(per_item["tags"]) if per_item["tags"] else None ) if "entities" in per_item and isinstance(per_item["entities"], list): per_item["entities"] = json.dumps(per_item["entities"]) if per_item["entities"] else None self.update(item_id, **per_item) results[item_id] = "updated" except Exception as e: # pragma: no cover - defensive results[item_id] = f"error: {e}" return results def count_by_tag( self, exclude_personal: bool = False, user_groups: Optional[List[str]] = None, granted_domains: Optional[List[str]] = None, ) -> Dict[str, int]: """Aggregate item counts per tag (one tag may belong to many items). Uses DuckDB ``json_each`` to unnest the JSON tag list. Items with no tags don't contribute. Visibility filter mirrors ``count_items`` (audience OR MEMORY_DOMAIN grant). """ where = ["tags IS NOT NULL"] params: List[Any] = [] if exclude_personal: where.append("(is_personal = FALSE OR is_personal IS NULL)") if user_groups is not None: visibility = ["audience IS NULL", "audience = 'all'"] if user_groups: ph = ",".join(["?"] * len(user_groups)) visibility.append(f"audience IN ({ph})") params.extend(user_groups) if granted_domains: dph = ",".join(["?"] * len(granted_domains)) visibility.append(f"domain IN ({dph})") params.extend(granted_domains) where.append("(" + " OR ".join(visibility) + ")") where_sql = " WHERE " + " AND ".join(where) sql = ( "SELECT t.value AS tag, COUNT(*) AS cnt " "FROM knowledge_items, json_each(knowledge_items.tags) AS t " f"{where_sql} " "GROUP BY t.value ORDER BY cnt DESC" ) rows = self.conn.execute(sql, params).fetchall() out: Dict[str, int] = {} for tag, cnt in rows: # json_each returns the raw scalar; strip wrapping quotes if needed. key = tag if isinstance(tag, str) else str(tag) if key.startswith('"') and key.endswith('"'): key = key[1:-1] out[key] = cnt return out def count_by_audience( self, exclude_personal: bool = False, user_groups: Optional[List[str]] = None, granted_domains: Optional[List[str]] = None, ) -> Dict[str, int]: """Aggregate item counts per audience bucket. ``audience`` is a free-form column whose canonical values are ``NULL`` / ``'all'`` / ``'group:'``. NULL is bucketed as ``'all'`` so the chip-filter UI doesn't need a separate "no audience" affordance. Visibility filter mirrors ``count_items`` (audience OR MEMORY_DOMAIN grant). """ where: List[str] = [] params: List[Any] = [] if exclude_personal: where.append("(is_personal = FALSE OR is_personal IS NULL)") if user_groups is not None: visibility = ["audience IS NULL", "audience = 'all'"] if user_groups: ph = ",".join(["?"] * len(user_groups)) visibility.append(f"audience IN ({ph})") params.extend(user_groups) if granted_domains: dph = ",".join(["?"] * len(granted_domains)) visibility.append(f"domain IN ({dph})") params.extend(granted_domains) where.append("(" + " OR ".join(visibility) + ")") where_sql = (" WHERE " + " AND ".join(where)) if where else "" sql = ( "SELECT COALESCE(audience, 'all') AS aud, COUNT(*) AS cnt " f"FROM knowledge_items{where_sql} " "GROUP BY aud ORDER BY cnt DESC" ) rows = self.conn.execute(sql, params).fetchall() return {r[0]: r[1] for r in rows} def find_contradiction_candidates( self, new_item_id: str, domain: Optional[str] = None, limit: int = 100, ) -> List[Dict[str, Any]]: """Same-domain candidates for LLM-based contradiction judgment. Domain is the only narrowing applied at the SQL layer. Topic / content matching is delegated to the LLM judge in services.corporate_memory.contradiction.find_and_judge() — see ADR Decision 4. The brittle keyword-substring layer that used to live here was removed; it had recall holes (synonyms, paraphrases) and the domain conjunct alone is enough as a hard ACL. Personal items (`is_personal = TRUE`) are excluded unconditionally — the LLM call is a read site (and exfiltrates content to the external API), so ADR Decision 1 ("hard privacy boundary, not a UI hint") applies. Without this filter, personal item content would be serialized into every contradiction prompt and could be paraphrased into `knowledge_contradictions.suggested_resolution.merged_content` — bypassing the contributor-only visibility rule. """ sql = """ SELECT * FROM knowledge_items WHERE status IN ('approved', 'mandatory', 'pending') AND (is_personal = FALSE OR is_personal IS NULL) AND id != ? """ params: List[Any] = [new_item_id] if domain: sql += " AND domain = ?" params.append(domain) sql += " ORDER BY updated_at DESC LIMIT ?" params.append(limit) return self._rows_to_dicts(self.conn.execute(sql, params).fetchall())