From 98a8aba3bebce65f31d7df7fe45623b669be5f95 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Tue, 5 May 2026 05:55:01 +0200
Subject: [PATCH] fix(tests): align test_llm_connector with new factory +
 fail-fast (#179 review)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PR rewrote collect_all() to call the new
create_extractor_from_env_or_config() helper, but the existing tests
still mocked the old direct create_extractor() symbol and the old
silent-skip-on-missing-config behavior. Five tests in
TestCorporateMemoryCollector and one in TestCollectorExtractorIntegration
were red on the PR branch.

Changes:
- Tests now mock connectors.llm.create_extractor_from_env_or_config
  (the symbol the collector imports lazily).
- Renamed test_collect_all_no_ai_config_skips ->
  test_collect_all_no_ai_config_or_env_raises and
  test_collector_handles_invalid_config -> test_collector_raises_on_invalid_config.
  Both assert pytest.raises(ValueError) — the explicit fail-fast
  semantics defect 5 of #176 was supposed to enforce.
- collect_all() no longer swallows the factory's ValueError into
  stats["errors"]; it propagates so the scheduler / admin endpoint
  surface the actionable misconfiguration message instead of
  pretending the run was a no-op.
- /api/admin/run-corporate-memory translates the propagated ValueError
  into a 500 with the factory's message, matching
  /api/admin/run-verification-detector.
---
 app/api/admin.py                       |  8 ++++++-
 services/corporate_memory/collector.py | 27 +++++++++++-----------
 tests/test_llm_connector.py            | 31 +++++++++++++-------------
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/app/api/admin.py b/app/api/admin.py
index efaacff..c6a51a2 100644
--- a/app/api/admin.py
+++ b/app/api/admin.py
@@ -2887,7 +2887,13 @@ def run_corporate_memory(
     """
     from services.corporate_memory.collector import collect_all
 
-    stats = collect_all(dry_run=False)
+    # Fail-fast (#176): collect_all raises ValueError when no ai: block AND
+    # no env keys are present. Surface the actionable factory message in a
+    # 500 instead of letting it crash the request anonymously.
+    try:
+        stats = collect_all(dry_run=False)
+    except ValueError as e:
+        raise HTTPException(status_code=500, detail=str(e))
 
     AuditRepository(conn).log(
         user_id=user.get("id"),
diff --git a/services/corporate_memory/collector.py b/services/corporate_memory/collector.py
index 80a8449..10fd341 100644
--- a/services/corporate_memory/collector.py
+++ b/services/corporate_memory/collector.py
@@ -419,21 +419,20 @@ def collect_all(dry_run: bool = False) -> dict:
     # Step 2: Initialize AI extractor.
     # Fail-fast (#176): no silent skip on missing ai: block. The factory
     # falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a
-    # clear ValueError if neither config nor env is available.
-    try:
-        from config.loader import load_instance_config
-        from connectors.llm import create_extractor_from_env_or_config
+    # clear ValueError if neither config nor env is available — propagate
+    # the ValueError so the scheduler / admin endpoint surface the
+    # actionable misconfiguration message instead of swallowing it into
+    # stats["errors"]. FileNotFoundError on the static config path is fine
+    # to swallow because the factory's env fallback can still satisfy.
+    from config.loader import load_instance_config
+    from connectors.llm import create_extractor_from_env_or_config
 
-        try:
-            instance_config = load_instance_config()
-        except (ValueError, FileNotFoundError):
-            instance_config = {}
-        ai_config = instance_config.get("ai") if instance_config else None
-        extractor = create_extractor_from_env_or_config(ai_config)
-    except (ValueError, FileNotFoundError) as e:
-        stats["errors"].append(str(e))
-        logger.error("Failed to initialize AI extractor: %s", e)
-        return stats
+    try:
+        instance_config = load_instance_config()
+    except (ValueError, FileNotFoundError):
+        instance_config = {}
+    ai_config = instance_config.get("ai") if instance_config else None
+    extractor = create_extractor_from_env_or_config(ai_config)
 
     # Determine initial status for new items based on approval mode
     governance_config = instance_config.get("corporate_memory", {})
diff --git a/tests/test_llm_connector.py b/tests/test_llm_connector.py
index 29297d6..311adb4 100644
--- a/tests/test_llm_connector.py
+++ b/tests/test_llm_connector.py
@@ -871,7 +871,7 @@ class TestCorporateMemoryCollector:
                 return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
             ),
             patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                 return_value=mock_extractor,
             ),
         ):
@@ -921,7 +921,7 @@ class TestCorporateMemoryCollector:
                 return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
             ),
             patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                 return_value=mock_extractor,
             ),
         ):
@@ -991,7 +991,7 @@ class TestCorporateMemoryCollector:
                 return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
             ),
             patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                 return_value=mock_extractor,
             ),
         ):
@@ -1023,7 +1023,7 @@ class TestCorporateMemoryCollector:
                 return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
             ),
             patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                 return_value=mock_extractor,
             ),
         ):
@@ -1032,8 +1032,8 @@ class TestCorporateMemoryCollector:
         assert len(stats["errors"]) == 1
         assert "LLM error" in stats["errors"][0]
 
-    def test_collect_all_no_ai_config_skips(self, tmp_path):
-        """collect_all skips when instance.yaml has no ai: section."""
+    def test_collect_all_no_ai_config_or_env_raises(self, tmp_path, monkeypatch):
+        """collect_all fails fast when neither ai: config nor LLM env keys exist (#176)."""
         from services.corporate_memory.collector import collect_all
 
         home = tmp_path / "home"
@@ -1042,6 +1042,10 @@ class TestCorporateMemoryCollector:
         user_dir.mkdir()
         (user_dir / "CLAUDE.local.md").write_text("Some content")
 
+        # Make sure no env-var fallback is available so the factory raises.
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.delenv("LLM_API_KEY", raising=False)
+
         with (
             patch("services.corporate_memory.collector.HOME_BASE", home),
             patch("services.corporate_memory.collector._read_json", return_value={}),
@@ -1049,10 +1053,9 @@ class TestCorporateMemoryCollector:
                 "config.loader.load_instance_config",
                 return_value={"server": {"host": "example.com"}},
             ),
+            pytest.raises(ValueError, match="LLM not configured"),
         ):
-            stats = collect_all(dry_run=True)
-
-        assert stats["skipped"] is True
+            collect_all(dry_run=True)
 
 
 # ===================================================================
@@ -1323,8 +1326,8 @@ class TestCollectorExtractorIntegration:
         assert stats["items_extracted"] == 0
         assert stats["errors"] == []
 
-    def test_collector_handles_invalid_config(self, tmp_path):
-        """Collector returns error when config is invalid."""
+    def test_collector_raises_on_invalid_config(self, tmp_path):
+        """Collector fail-fasts (raises ValueError) when ai: config is invalid (#176)."""
         from services.corporate_memory.collector import collect_all
 
         home = tmp_path / "home"
@@ -1340,8 +1343,6 @@ class TestCollectorExtractorIntegration:
                 "config.loader.load_instance_config",
                 return_value={"ai": {"provider": "anthropic", "api_key": ""}},
             ),
+            pytest.raises(ValueError, match="must not be empty"),
         ):
-            stats = collect_all(dry_run=True)
-
-        assert len(stats["errors"]) == 1
-        assert "must not be empty" in stats["errors"][0]
+            collect_all(dry_run=True)