"""Contract for session-pipeline processors. A processor is anything that, given a parsed Claude Code session jsonl file, emits some side effect — knowledge extraction, usage events, error metrics, security findings, etc. The runner (`services/session_pipeline/runner.py`) calls process_session() once per unprocessed file and persists state on success. """ from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import Protocol, runtime_checkable import duckdb @dataclass(frozen=True) class ProcessorResult: """Per-session outcome surfaced to the runner. items_count is the number of records the processor produced (knowledge items, events, etc.) and is stored in session_processor_state.items_extracted for observability — not load-bearing for the framework's correctness.""" items_count: int = 0 @runtime_checkable class SessionProcessor(Protocol): """Implementations live in services/session_processors/.py and are listed in services/session_processors/__init__.py:PROCESSORS.""" name: str """Unique processor key. Used in session_processor_state.processor_name and as the URL query param for /api/admin/run-session-processor.""" cadence_minutes: int """How often the scheduler should invoke this processor. The actual schedule entry is built in services/scheduler/__main__.py from this value (env-overridable per processor).""" def process_session( self, session_path: Path, username: str, session_key: str, conn: duckdb.DuckDBPyConnection, ) -> ProcessorResult: """Process exactly one session jsonl. Idempotent per (name, session_key, file_hash). Raise = the runner will NOT mark this session as processed for this processor → it will be retried on the next scheduler tick. Return = the runner marks it processed and skips it next time (until its file_hash changes).""" ...