agnes-the-ai-analyst/app/api/memory.py
PavelDo e1108b6112
feat(memory): corporate memory v1+v1.5 + 0.15.0 (#72)
Adds corporate memory v1 (verification flywheel + contradiction detection + confidence scoring) and v1.5 (audience-based distribution + per-item privacy + admin curation). Server: GET /api/memory/bundle returns mandatory + ranked-approved items within a token budget; POST /api/memory/admin/mandate accepts an audience field gated against user_group_members; /api/memory/stats uses SQL aggregation. CLI: da sync writes received items to .claude/rules/km_*.md. Verification detector extracts knowledge candidates from session JSONL files. Auto-tagging via Haiku when ai: is configured. Adapted from the v9-era branch onto v13/v14 RBAC: _is_privileged_viewer + _effective_groups now query user_group_members JOIN user_groups; require_role(Role.KM_ADMIN) replaced with require_admin (km_admin collapsed into admin). Schema v15: knowledge_items context-engineering columns + knowledge_contradictions + session_extraction_state. Schema v16: verification_evidence. Cuts release v0.15.0 (also bundles #116 /me/debug page).
2026-04-29 07:16:22 +02:00

783 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Corporate memory endpoints — knowledge items, voting, governance admin, contradictions."""
import asyncio
import json
import logging
import uuid
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
import duckdb
from app.auth.dependencies import get_current_user, _get_db
from app.auth.access import require_admin, is_user_admin
from src.repositories.knowledge import KnowledgeRepository
from src.repositories.audit import AuditRepository
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/memory", tags=["memory"])
VALID_STATUSES = ["pending", "approved", "mandatory", "rejected", "revoked", "expired"]
BUNDLE_TOKEN_BUDGET = 6000
# Rough chars-per-token estimate (conservative).
_CHARS_PER_TOKEN = 4
VALID_DOMAINS = ["finance", "engineering", "product", "data", "operations", "infrastructure"]
def _is_privileged_viewer(user: dict, conn: duckdb.DuckDBPyConnection) -> bool:
"""Admins (members of the Admin system group, per RBAC v13) are the
privileged viewer tier. Pre-v13 the schema also had a km_admin role; v13
collapsed the role hierarchy into groups, so the corporate-memory admin
capability now lives on top of plain admin membership. Module authors
needing a finer-grained gate (curator-only, etc.) should add a
``ResourceType.CORPORATE_MEMORY_ADMIN`` resource type and gate with
``require_resource_access`` instead of extending this helper."""
user_id = user.get("id")
if not user_id:
return False
return is_user_admin(user_id, conn)
def _effective_groups(
user: dict, conn: duckdb.DuckDBPyConnection
) -> Optional[List[str]]:
"""Audience-filter group list for the caller, or ``None`` for admins
(no filter — see all items regardless of audience).
Reads from ``user_group_members`` JOIN ``user_groups`` (the v13 model).
Pre-v13 this read ``users.groups`` JSON; that column was dropped in v13
and the membership is now materialized in ``user_group_members`` with a
``source`` discriminator (admin / google_sync / system_seed).
"""
if _is_privileged_viewer(user, conn):
return None
user_id = user.get("id")
if not user_id:
return []
rows = conn.execute(
"""SELECT g.name FROM user_group_members m
JOIN user_groups g ON m.group_id = g.id
WHERE m.user_id = ?""",
[user_id],
).fetchall()
return [f"group:{r[0]}" for r in rows]
def _can_view_item(user: dict, item: dict, is_priv: bool) -> bool:
"""Personal items are visible only to the contributor and privileged
viewers. Non-personal items are visible to any authenticated user.
``is_priv`` is pre-computed by the caller (one DB hit per request) so
a per-item loop doesn't re-query ``user_group_members`` for every row.
"""
if not item.get("is_personal"):
return True
if is_priv:
return True
return item.get("source_user") == user.get("email")
class CreateKnowledgeRequest(BaseModel):
title: str
content: str
category: str
tags: Optional[List[str]] = None
domain: Optional[str] = None
entities: Optional[List[str]] = None
source_type: Optional[str] = None
class VoteRequest(BaseModel):
vote: int
class PersonalFlagRequest(BaseModel):
is_personal: bool
class AdminActionRequest(BaseModel):
reason: Optional[str] = None
audience: Optional[str] = None
class EditRequest(BaseModel):
title: Optional[str] = None
content: Optional[str] = None
class BatchActionRequest(BaseModel):
item_ids: List[str]
action: str # approve, reject, mandate, revoke
reason: Optional[str] = None
audience: Optional[str] = None
class ResolveContradictionRequest(BaseModel):
resolution: str # kept_a, kept_b, merged, both_valid
class CreateContradictionRequest(BaseModel):
item_a_id: str
item_b_id: str
explanation: str
severity: Optional[str] = None
suggested_resolution: Optional[str] = None
# ---- User endpoints ----
@router.get("")
async def list_knowledge(
status_filter: Optional[str] = None,
category: Optional[str] = None,
domain: Optional[str] = None,
source_type: Optional[str] = None,
search: Optional[str] = None,
exclude_personal: bool = True,
page: int = 1,
per_page: int = 50,
sort: str = "updated_at",
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""List knowledge items with filtering, pagination, search."""
repo = KnowledgeRepository(conn)
page = max(page, 1)
offset = (page - 1) * per_page
# Privacy: non-privileged viewers can never opt out of the personal filter.
# Their own personal contributions are visible via /my-contributions, not here.
effective_exclude_personal = True if not _is_privileged_viewer(user, conn) else exclude_personal
effective_groups = _effective_groups(user, conn)
statuses = [status_filter] if status_filter else None
if search:
items = repo.search(
search,
exclude_personal=effective_exclude_personal,
user_groups=effective_groups,
statuses=statuses,
category=category,
domain=domain,
source_type=source_type,
limit=per_page,
offset=offset,
)
else:
items = repo.list_items(
statuses=statuses,
category=category,
domain=domain,
source_type=source_type,
exclude_personal=effective_exclude_personal,
user_groups=effective_groups,
limit=per_page,
offset=offset,
)
# Enrich with votes
for item in items:
votes = repo.get_votes(item["id"])
item["upvotes"] = votes["upvotes"]
item["downvotes"] = votes["downvotes"]
item["score"] = votes["upvotes"] - votes["downvotes"]
import math
total_count = repo.count_items(
search=search,
statuses=statuses,
category=category,
domain=domain,
source_type=source_type,
exclude_personal=effective_exclude_personal,
user_groups=effective_groups,
)
total_pages = math.ceil(total_count / per_page) if per_page > 0 else 1
return {
"items": items,
"count": len(items),
"page": page,
"per_page": per_page,
"total_count": total_count,
"total_pages": total_pages,
}
@router.get("/stats")
async def get_stats(
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get corporate memory statistics.
Aggregations exclude personal items for non-privileged callers — otherwise
`total` and the `by_*` counts would change in observable ways when a
colleague flags or unflags a personal item, leaking existence info per
ADR Decision 1.
Uses SQL aggregation rather than ``repo.list_items()`` to keep the
endpoint cheap on large knowledge bases (the loader path materializes
every row + parses JSON tags/contributors per row, which blocks the
event loop on N>1k items). Audience filter mirrors what list_items
applies: ``audience IS NULL OR audience = 'all'`` plus, for non-admins,
membership in any of the caller's group-prefixed audiences.
"""
is_priv = _is_privileged_viewer(user, conn)
groups = _effective_groups(user, conn)
where_clauses: List[str] = []
params: list = []
if not is_priv:
# Personal-item privacy: non-privileged callers see no personal items
# in the aggregate, even their own. /my-contributions is the canonical
# surface for a user's personal contributions; including them here
# would make /api/memory/stats.total disagree with the count visible
# via GET /api/memory (which forces exclude_personal=True for non-
# admins regardless of source_user).
where_clauses.append("(is_personal IS NULL OR is_personal = FALSE)")
if groups is not None:
# groups is None for admins → no audience filter; otherwise restrict to
# null/'all' or one of the caller's group audiences.
if groups:
placeholders = ",".join(["?"] * len(groups))
where_clauses.append(
f"(audience IS NULL OR audience = 'all' OR audience IN ({placeholders}))"
)
params.extend(groups)
else:
where_clauses.append("(audience IS NULL OR audience = 'all')")
where_sql = (" WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
total = conn.execute(
f"SELECT COUNT(*) FROM knowledge_items{where_sql}", params
).fetchone()[0] or 0
by_status_rows = conn.execute(
f"SELECT COALESCE(status, 'unknown') AS s, COUNT(*) "
f"FROM knowledge_items{where_sql} GROUP BY s",
params,
).fetchall()
by_status = {r[0]: r[1] for r in by_status_rows}
cat_rows = conn.execute(
f"SELECT DISTINCT category FROM knowledge_items{where_sql} "
f"{'AND' if where_sql else 'WHERE'} category IS NOT NULL",
params,
).fetchall()
categories = sorted(r[0] for r in cat_rows if r[0])
by_domain_rows = conn.execute(
f"SELECT COALESCE(domain, 'unset') AS d, COUNT(*) "
f"FROM knowledge_items{where_sql} GROUP BY d",
params,
).fetchall()
by_domain = {r[0]: r[1] for r in by_domain_rows}
by_source_rows = conn.execute(
f"SELECT COALESCE(source_type, 'unknown') AS st, COUNT(*) "
f"FROM knowledge_items{where_sql} GROUP BY st",
params,
).fetchall()
by_source_type = {r[0]: r[1] for r in by_source_rows}
return {
"total": total,
"by_status": by_status,
"categories": categories,
"by_domain": by_domain,
"by_source_type": by_source_type,
}
@router.post("", status_code=201)
async def create_knowledge(
request: CreateKnowledgeRequest,
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
item_id = str(uuid.uuid4())
# Best-effort auto-tagging — runs only when an LLM extractor is configured.
tags = list(request.tags) if request.tags else []
try:
from config.loader import load_instance_config
from connectors.llm import create_extractor
from services.corporate_memory.tagger import auto_tag_items
cfg = load_instance_config()
ai_cfg = cfg.get("ai")
if ai_cfg:
extractor = create_extractor(ai_cfg)
stub = [{"id": item_id, "title": request.title, "content": request.content}]
assignments = await asyncio.to_thread(auto_tag_items, stub, extractor)
topics = assignments.get(item_id, [])
if topics:
seen: set[str] = set()
merged: list[str] = []
for t in topics + tags:
if t not in seen:
seen.add(t)
merged.append(t)
tags = merged
except Exception:
pass # tagging is non-critical — never block item creation
create_kwargs = dict(
id=item_id,
title=request.title,
content=request.content,
category=request.category,
source_user=user.get("email"),
tags=tags or None,
domain=request.domain,
entities=request.entities,
confidence=0.50,
)
if request.source_type:
create_kwargs["source_type"] = request.source_type
repo.create(**create_kwargs)
return {"id": item_id, "status": "pending"}
@router.post("/{item_id}/vote")
async def vote_knowledge(
item_id: str,
request: VoteRequest,
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
if request.vote not in (1, -1, 0):
raise HTTPException(status_code=400, detail="Vote must be 1, -1, or 0 (retract)")
repo = KnowledgeRepository(conn)
item = repo.get_by_id(item_id)
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
raise HTTPException(status_code=404, detail="Knowledge item not found")
if request.vote == 0:
repo.unvote(item_id, user["id"])
else:
repo.vote(item_id, user["id"], request.vote)
return repo.get_votes(item_id)
@router.get("/my-votes")
async def get_my_votes(
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get current user's votes on all items."""
results = conn.execute(
"SELECT item_id, vote FROM knowledge_votes WHERE user_id = ?", [user["id"]]
).fetchall()
return {row[0]: row[1] for row in results}
@router.get("/my-contributions")
async def get_my_contributions(
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get knowledge items contributed by the current user."""
repo = KnowledgeRepository(conn)
email = user.get("email", "")
items = repo.get_user_contributions(email)
for item in items:
votes = repo.get_votes(item["id"])
item["upvotes"] = votes["upvotes"]
item["downvotes"] = votes["downvotes"]
item["score"] = votes["upvotes"] - votes["downvotes"]
return {"items": items, "count": len(items)}
@router.post("/{item_id}/personal")
async def toggle_personal_flag(
item_id: str,
request: PersonalFlagRequest,
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Toggle personal/excluded flag on a knowledge item (only by the contributor)."""
repo = KnowledgeRepository(conn)
item = repo.get_by_id(item_id)
if not item:
raise HTTPException(status_code=404, detail="Knowledge item not found")
if item.get("source_user") != user.get("email"):
raise HTTPException(status_code=403, detail="Only the contributor can flag personal items")
repo.set_personal(item_id, request.is_personal)
return {"id": item_id, "is_personal": request.is_personal}
@router.get("/{item_id}/provenance")
async def get_provenance(
item_id: str,
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get source provenance for a knowledge item."""
repo = KnowledgeRepository(conn)
item = repo.get_by_id(item_id)
if not item or not _can_view_item(user, item, _is_privileged_viewer(user, conn)):
raise HTTPException(status_code=404, detail="Knowledge item not found")
return {
"id": item_id,
"source_type": item.get("source_type"),
"source_ref": item.get("source_ref"),
"source_user": item.get("source_user"),
"confidence": item.get("confidence"),
"domain": item.get("domain"),
"entities": item.get("entities"),
"valid_from": item.get("valid_from"),
"valid_until": item.get("valid_until"),
"supersedes": item.get("supersedes"),
"created_at": item.get("created_at"),
}
# ---- Admin governance endpoints ----
def _get_item_or_404(repo: KnowledgeRepository, item_id: str) -> dict:
item = repo.get_by_id(item_id)
if not item:
raise HTTPException(status_code=404, detail="Knowledge item not found")
return item
def _audit_action(conn, admin_email: str, action: str, item_id: str, details: dict = None):
audit = AuditRepository(conn)
audit.log(user_id=admin_email, action=f"km_{action}", resource=item_id, params=details)
@router.post("/admin/approve")
async def admin_approve(
item_id: str,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
_get_item_or_404(repo, item_id)
repo.update_status(item_id, "approved")
_audit_action(conn, user["email"], "approve", item_id)
return {"id": item_id, "status": "approved"}
@router.post("/admin/reject")
async def admin_reject(
item_id: str,
request: AdminActionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
_get_item_or_404(repo, item_id)
repo.update_status(item_id, "rejected")
_audit_action(conn, user["email"], "reject", item_id, {"reason": request.reason})
return {"id": item_id, "status": "rejected"}
@router.post("/admin/mandate")
async def admin_mandate(
item_id: str,
request: AdminActionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
_get_item_or_404(repo, item_id)
repo.update_status(item_id, "mandatory")
if request.audience is not None:
repo.update(item_id, audience=request.audience)
_audit_action(conn, user["email"], "mandate", item_id, {
"reason": request.reason, "audience": request.audience,
})
return {"id": item_id, "status": "mandatory"}
@router.post("/admin/revoke")
async def admin_revoke(
item_id: str,
request: AdminActionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
_get_item_or_404(repo, item_id)
repo.update_status(item_id, "revoked")
_audit_action(conn, user["email"], "revoke", item_id, {"reason": request.reason})
return {"id": item_id, "status": "revoked"}
@router.post("/admin/edit")
async def admin_edit(
item_id: str,
request: EditRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
repo = KnowledgeRepository(conn)
_get_item_or_404(repo, item_id)
updates = {}
if request.title is not None:
updates["title"] = request.title
if request.content is not None:
updates["content"] = request.content
if updates:
repo.update(item_id, **updates)
_audit_action(conn, user["email"], "edit", item_id, updates)
return {"id": item_id, "updated": list(updates.keys())}
@router.post("/admin/batch")
async def admin_batch(
request: BatchActionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Batch governance action on multiple items."""
repo = KnowledgeRepository(conn)
action_map = {
"approve": "approved",
"reject": "rejected",
"mandate": "mandatory",
"revoke": "revoked",
}
if request.action not in action_map:
raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}")
new_status = action_map[request.action]
results = {"success": [], "not_found": []}
for item_id in request.item_ids:
item = repo.get_by_id(item_id)
if not item:
results["not_found"].append(item_id)
continue
repo.update_status(item_id, new_status)
if request.action == "mandate" and request.audience is not None:
repo.update(item_id, audience=request.audience)
_audit_action(conn, user["email"], request.action, item_id, {
"reason": request.reason, "audience": request.audience, "batch": True,
})
results["success"].append(item_id)
return results
@router.get("/admin/pending")
async def admin_pending(
category: Optional[str] = None,
page: int = 1,
per_page: int = 50,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get pending items queue for admin review."""
repo = KnowledgeRepository(conn)
page = max(page, 1)
offset = (page - 1) * per_page
items = repo.list_items(statuses=["pending"], category=category, limit=per_page, offset=offset)
return {"items": items, "count": len(items)}
@router.get("/admin/audit")
async def admin_audit(
page: int = 1,
per_page: int = 50,
action: Optional[str] = None,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Get governance audit log."""
audit = AuditRepository(conn)
# Filter km_ prefixed actions
km_action = f"km_{action}" if action else None
entries = audit.query(action=km_action, limit=per_page)
if not km_action:
# Get all km_ actions
entries = conn.execute(
"SELECT * FROM audit_log WHERE action LIKE 'km_%' ORDER BY timestamp DESC LIMIT ?",
[per_page],
).fetchall()
if entries:
columns = [desc[0] for desc in conn.description]
entries = [dict(zip(columns, row)) for row in entries]
else:
entries = []
return {"entries": entries, "count": len(entries)}
# ---- Admin contradiction endpoints ----
@router.get("/admin/contradictions")
async def admin_contradictions(
resolved: Optional[bool] = None,
exclude_personal: bool = True,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""List knowledge contradictions for admin review.
By default (`exclude_personal=True`), personal items are replaced with
{id, hidden: true} so the contradiction record is still visible for
governance but personal content is not exposed. Pass exclude_personal=false
to opt in to full content (KM_ADMIN only — see ADR Decision 1).
"""
repo = KnowledgeRepository(conn)
contradictions = repo.list_contradictions(resolved=resolved)
# Collect all distinct item IDs and fetch in one query (M5 batch optimisation).
all_item_ids = list({
id_
for c in contradictions
for id_ in (c["item_a_id"], c["item_b_id"])
})
items_by_id = repo.get_by_ids(all_item_ids)
for c in contradictions:
item_a = items_by_id.get(c["item_a_id"])
item_b = items_by_id.get(c["item_b_id"])
if exclude_personal:
c["item_a"] = {"id": c["item_a_id"], "hidden": True} if item_a and item_a.get("is_personal") else item_a
c["item_b"] = {"id": c["item_b_id"], "hidden": True} if item_b and item_b.get("is_personal") else item_b
else:
c["item_a"] = item_a
c["item_b"] = item_b
return {"contradictions": contradictions, "count": len(contradictions)}
@router.post("/admin/contradictions")
async def admin_create_contradiction(
request: CreateContradictionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Admin endpoint for manually recording a contradiction between two knowledge items."""
repo = KnowledgeRepository(conn)
if not repo.get_by_id(request.item_a_id):
raise HTTPException(status_code=404, detail=f"Item A not found: {request.item_a_id}")
if not repo.get_by_id(request.item_b_id):
raise HTTPException(status_code=404, detail=f"Item B not found: {request.item_b_id}")
cid = repo.create_contradiction(
item_a_id=request.item_a_id,
item_b_id=request.item_b_id,
explanation=request.explanation,
severity=request.severity,
suggested_resolution=request.suggested_resolution,
)
return {"id": cid}
@router.post("/admin/contradictions/{contradiction_id}/resolve")
async def admin_resolve_contradiction(
contradiction_id: str,
request: ResolveContradictionRequest,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Resolve a knowledge contradiction."""
repo = KnowledgeRepository(conn)
contradiction = repo.get_contradiction(contradiction_id)
if not contradiction:
raise HTTPException(status_code=404, detail="Contradiction not found")
if contradiction.get("resolved"):
raise HTTPException(status_code=400, detail="Contradiction already resolved")
valid_resolutions = ["kept_a", "kept_b", "merged", "both_valid"]
if request.resolution not in valid_resolutions:
raise HTTPException(
status_code=400,
detail=f"Resolution must be one of: {valid_resolutions}",
)
repo.resolve_contradiction(contradiction_id, user["email"], request.resolution)
_audit_action(conn, user["email"], "resolve_contradiction", contradiction_id, {
"resolution": request.resolution,
"item_a_id": contradiction["item_a_id"],
"item_b_id": contradiction["item_b_id"],
})
return {"id": contradiction_id, "resolved": True, "resolution": request.resolution}
# ---- Bundle endpoint ----
@router.get("/bundle")
async def get_bundle(
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Token-budgeted bundle of knowledge items for AI agent injection.
Mandatory items are always included regardless of the token budget.
Approved items are confidence×recency-ranked and included until the budget
is exhausted. Audience-filtered by the caller's group memberships (admins
see everything).
"""
from datetime import datetime, timezone
repo = KnowledgeRepository(conn)
effective_groups = _effective_groups(user, conn)
mandatory = repo.list_items(
statuses=["mandatory"],
exclude_personal=True,
user_groups=effective_groups,
limit=1000,
offset=0,
)
approved = repo.list_items(
statuses=["approved"],
exclude_personal=True,
user_groups=effective_groups,
limit=1000,
offset=0,
)
# Rank approved by confidence × recency (days since updated_at, max 365).
# updated_at is intentional: a recently admin-edited item reflects a human
# who just reviewed and corrected it, making it more trustworthy than an
# older untouched item. This differs from confidence.py which decays from
# created_at — the two scores serve different purposes (credibility vs freshness).
now = datetime.now(timezone.utc)
def _rank(item: dict) -> float:
confidence = float(item["confidence"]) if item.get("confidence") is not None else 0.5
updated_raw = item.get("updated_at")
if updated_raw:
try:
if isinstance(updated_raw, str):
from datetime import datetime as dt
updated = dt.fromisoformat(updated_raw.replace("Z", "+00:00"))
else:
updated = updated_raw
if updated.tzinfo is None:
from datetime import timezone as tz
updated = updated.replace(tzinfo=tz.utc)
age_days = max((now - updated).days, 0)
except Exception:
age_days = 365
else:
age_days = 365
recency = max(0.0, 1.0 - age_days / 365.0)
return confidence * recency
approved_ranked = sorted(approved, key=_rank, reverse=True)
def _token_est(item: dict) -> int:
return len((item.get("title", "") + " " + item.get("content", ""))) // _CHARS_PER_TOKEN
budget_remaining = BUNDLE_TOKEN_BUDGET - sum(_token_est(i) for i in mandatory)
approved_included = []
for item in approved_ranked:
cost = _token_est(item)
if budget_remaining - cost < 0:
break
approved_included.append(item)
budget_remaining -= cost
return {
"mandatory": mandatory,
"approved": approved_included,
"token_estimate": BUNDLE_TOKEN_BUDGET - budget_remaining,
"token_budget": BUNDLE_TOKEN_BUDGET,
}