From 9f9aabd72bd223d35c8c04c7cec1219ad1edc5e7 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Tue, 5 May 2026 06:45:10 +0200
Subject: [PATCH] fix(corporate-memory): CLI catches fail-fast ValueError,
 exits 1 with clean message (Devin Review on #179)

The PR's #176 fail-fast change made collect_all() raise ValueError when
neither an ai: block nor ANTHROPIC_API_KEY/LLM_API_KEY was available.
verification_detector's CLI was updated to handle it; corporate_memory's
CLI was missed and crashed with an unhandled traceback.

services/corporate_memory/collector.py:main() now wraps the collect_all
call in try/except ValueError, prints a one-line actionable message
to stderr, and returns rc=1.

Regression test:
test_llm_connector.py::TestCorporateMemoryCollector::test_main_returns_1_on_no_ai_config_instead_of_traceback.
---
 CHANGELOG.md                           |  1 +
 services/corporate_memory/collector.py |  9 ++++++++-
 tests/test_llm_connector.py            | 21 +++++++++++++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d46ceb..8c6a1a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -43,6 +43,7 @@ Five-defect fix for the silently-broken session pipeline on default Compose depl
 - **Defect 2 — side-car services gated by `profiles: [full]` were silently skipped on default deploys.** Both stanzas dropped (Changed above); the scheduler-v2 cron is the sole driver.
 - **Defect 1 — `/corporate-memory` filtered `status IN ('approved','mandatory')` with no hint that pending items existed.** Admin banner added (Added above).
 - **#179 review — `/api/admin/run-session-collector` would SystemExit the worker.** The endpoint called `collector.main()`, whose `argparse.parse_args()` parsed uvicorn's `sys.argv` (`['app.main:app', '--host', …]`) and called `sys.exit(2)` on the unrecognised flags. `SystemExit` inherits from `BaseException`, escapes FastAPI's exception machinery, and propagates through the thread pool — every scheduler tick that fired the endpoint either 500-ed or risked killing the uvicorn worker. Fix: `services/session_collector/collector.py` now exposes an argv-free `run(dry_run, verbose) -> (rc, stats)` helper; `main()` is a thin CLI shim around it and the admin endpoint calls `run()` directly. Audit log now carries the per-run stats (`users_processed`, `files_copied`, `files_skipped`) instead of just the rc. Regression tests in `tests/test_session_collector.py::TestRunHelper`.
+- **#179 review — `python -m services.corporate_memory` crashed on missing LLM config instead of exiting cleanly.** The PR's fail-fast change made `collect_all()` raise `ValueError` when neither an `ai:` block nor `ANTHROPIC_API_KEY`/`LLM_API_KEY` was available. The `verification_detector` CLI was updated to catch it; the corporate-memory CLI was missed. Now also wrapped — operators get a one-line `Corporate Memory cannot run: <factory message>` on stderr and rc=1 instead of a raw traceback. Regression test in `tests/test_llm_connector.py::TestCorporateMemoryCollector::test_main_returns_1_on_no_ai_config_instead_of_traceback`.
 
 ### Internal
 
diff --git a/services/corporate_memory/collector.py b/services/corporate_memory/collector.py
index 4484dca..b26c976 100644
--- a/services/corporate_memory/collector.py
+++ b/services/corporate_memory/collector.py
@@ -24,6 +24,7 @@ import hashlib
 import json
 import logging
 import os
+import sys
 import tempfile
 from datetime import datetime, timezone
 from pathlib import Path
@@ -624,7 +625,13 @@ def main() -> int:
         print("Data cleared. Running fresh collection...\n")
 
     logger.info("Starting knowledge collection...")
-    stats = collect_all(dry_run=args.dry_run)
+    try:
+        stats = collect_all(dry_run=args.dry_run)
+    except ValueError as e:
+        # collect_all() now fail-fasts on missing ai: config + env (#176).
+        # Print the actionable message instead of a raw traceback.
+        print(f"\nCorporate Memory cannot run: {e}", file=sys.stderr)
+        return 1
 
     print("\nCollection complete:")
     if stats["skipped"]:
diff --git a/tests/test_llm_connector.py b/tests/test_llm_connector.py
index 311adb4..75352a2 100644
--- a/tests/test_llm_connector.py
+++ b/tests/test_llm_connector.py
@@ -1057,6 +1057,27 @@ class TestCorporateMemoryCollector:
         ):
             collect_all(dry_run=True)
 
+    def test_main_returns_1_on_no_ai_config_instead_of_traceback(self, tmp_path, monkeypatch, capsys):
+        """CLI main() must catch the new fail-fast ValueError from collect_all() and exit cleanly.
+
+        Regression for Devin Review on #179: previously the CLI crashed with an
+        unhandled traceback when env + ai: config were both missing.
+        """
+        from services.corporate_memory import collector as cm
+
+        monkeypatch.setattr("sys.argv", ["corporate_memory"])
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.delenv("LLM_API_KEY", raising=False)
+        monkeypatch.setattr(cm, "CORPORATE_MEMORY_DIR", tmp_path / "cm")
+        monkeypatch.setattr(cm, "COLLECTION_LOG", tmp_path / "cm" / "log")
+        monkeypatch.setattr(cm, "collect_all", lambda dry_run=False: (_ for _ in ()).throw(ValueError("LLM not configured")))
+
+        rc = cm.main()
+        assert rc == 1
+        err = capsys.readouterr().err
+        assert "Corporate Memory cannot run" in err
+        assert "LLM not configured" in err
+
 
 # ===================================================================
 # Corporate Memory collector - helper function tests