agnes-the-ai-analyst/services/verification_detector/__main__.py

"""CLI entry point for the verification detector service.

Usage:
    python -m services.verification_detector [--dry-run] [--verbose] [--reset]

TODO(scheduler-v2): Trigger is manual-only today (CLI) but detect_and_record is
also called inline per new knowledge item submission. Wire into
services/scheduler/__main__.py JOBS list (e.g. hourly) and expose an admin
endpoint /api/admin/run-verification that calls detector.run() so the
scheduler stays the single source of truth for cadence.

TODO(notifications): When new pending items land in knowledge_items via
detector.run(), there is no admin notification. Hook into services/telegram_bot
or email so km_admins are pinged with a digest of pending items to triage.
"""

import argparse
import logging
import sys

from app.logging_config import setup_logging
from src.db import get_system_db

from . import detector

logger = logging.getLogger(__name__)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Extract verified organizational knowledge from analyst session transcripts."
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Analyze sessions but do not write results to the database.",
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        help="Enable debug-level logging.",
    )
    parser.add_argument(
        "--reset",
        action="store_true",
        help="Reset session processing state before running.",
    )
    args = parser.parse_args()

    setup_logging(__name__, level="DEBUG" if args.verbose else "INFO")

    # Load AI config; fail fast on missing config + env (#176).
    # Use the overlay-aware loader (#179 review fix) so an ai: block written
    # by /api/admin/configure to DATA_DIR/state/instance.yaml actually flows
    # through to the factory.
    from connectors.llm import create_extractor_from_env_or_config
    try:
        from app.instance_config import load_instance_config

        try:
            config = load_instance_config()
        except (ValueError, FileNotFoundError):
            config = {}
        ai_config = config.get("ai") if config else None
        extractor = create_extractor_from_env_or_config(ai_config)
    except (ValueError, FileNotFoundError) as e:
        logger.error(
            "Failed to initialize verification detector: %s. "
            "Configure ai: in instance.yaml or set ANTHROPIC_API_KEY / LLM_API_KEY.",
            e,
        )
        sys.exit(1)

    conn = get_system_db()

    if args.reset:
        logger.info("Resetting session extraction state...")
        conn.execute("DELETE FROM session_extraction_state")
        logger.info("Session extraction state cleared.")

    stats = detector.run(conn, extractor, dry_run=args.dry_run)

    print("\n--- Verification Detector Summary ---")
    print(f"Sessions scanned:        {stats['sessions_scanned']}")
    print(f"Sessions processed:      {stats['sessions_processed']}")
    print(f"Sessions skipped:        {stats['sessions_skipped']}")
    print(f"Verifications extracted:  {stats['verifications_extracted']}")
    print(f"Items created:           {stats['items_created']}")
    if stats["errors"]:
        print(f"Errors:                  {len(stats['errors'])}")
        for err in stats["errors"]:
            print(f"  - {err}")
    if args.dry_run:
        print("\n(dry-run mode -- no changes were written)")

    if stats["errors"]:
        sys.exit(1)


if __name__ == "__main__":
    main()