fix(tests): align test_llm_connector with new factory + fail-fast (#179 review)

The PR rewrote collect_all() to call the new create_extractor_from_env_or_config() helper, but the existing tests still mocked the old direct create_extractor() symbol and the old silent-skip-on-missing-config behavior. Five tests in TestCorporateMemoryCollector and one in TestCollectorExtractorIntegration were red on the PR branch. Changes: - Tests now mock connectors.llm.create_extractor_from_env_or_config (the symbol the collector imports lazily). - Renamed test_collect_all_no_ai_config_skips -> test_collect_all_no_ai_config_or_env_raises and test_collector_handles_invalid_config -> test_collector_raises_on_invalid_config. Both assert pytest.raises(ValueError) — the explicit fail-fast semantics defect 5 of #176 was supposed to enforce. - collect_all() no longer swallows the factory's ValueError into stats["errors"]; it propagates so the scheduler / admin endpoint surface the actionable misconfiguration message instead of pretending the run was a no-op. - /api/admin/run-corporate-memory translates the propagated ValueError into a 500 with the factory's message, matching /api/admin/run-verification-detector.
2026-05-05 05:55:01 +02:00 · 2026-05-05 05:55:01 +02:00 · 98a8aba3be
commit 98a8aba3be
parent 567385d046
3 changed files with 36 additions and 30 deletions
--- a/app/api/admin.py
+++ b/app/api/admin.py
@ -2887,7 +2887,13 @@ def run_corporate_memory(
    """
    from services.corporate_memory.collector import collect_all

+    # Fail-fast (#176): collect_all raises ValueError when no ai: block AND
+    # no env keys are present. Surface the actionable factory message in a
+    # 500 instead of letting it crash the request anonymously.
+    try:
        stats = collect_all(dry_run=False)
+    except ValueError as e:
+        raise HTTPException(status_code=500, detail=str(e))

    AuditRepository(conn).log(
        user_id=user.get("id"),
--- a/services/corporate_memory/collector.py
+++ b/services/corporate_memory/collector.py
@ -419,8 +419,11 @@ def collect_all(dry_run: bool = False) -> dict:
    # Step 2: Initialize AI extractor.
    # Fail-fast (#176): no silent skip on missing ai: block. The factory
    # falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a
-    # clear ValueError if neither config nor env is available.
-    try:
+    # clear ValueError if neither config nor env is available — propagate
+    # the ValueError so the scheduler / admin endpoint surface the
+    # actionable misconfiguration message instead of swallowing it into
+    # stats["errors"]. FileNotFoundError on the static config path is fine
+    # to swallow because the factory's env fallback can still satisfy.
    from config.loader import load_instance_config
    from connectors.llm import create_extractor_from_env_or_config

@ -430,10 +433,6 @@ def collect_all(dry_run: bool = False) -> dict:
        instance_config = {}
    ai_config = instance_config.get("ai") if instance_config else None
    extractor = create_extractor_from_env_or_config(ai_config)
-    except (ValueError, FileNotFoundError) as e:
-        stats["errors"].append(str(e))
-        logger.error("Failed to initialize AI extractor: %s", e)
-        return stats

    # Determine initial status for new items based on approval mode
    governance_config = instance_config.get("corporate_memory", {})
--- a/tests/test_llm_connector.py
+++ b/tests/test_llm_connector.py
@ -871,7 +871,7 @@ class TestCorporateMemoryCollector:
                return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
            ),
            patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                return_value=mock_extractor,
            ),
        ):
@ -921,7 +921,7 @@ class TestCorporateMemoryCollector:
                return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
            ),
            patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                return_value=mock_extractor,
            ),
        ):
@ -991,7 +991,7 @@ class TestCorporateMemoryCollector:
                return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
            ),
            patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                return_value=mock_extractor,
            ),
        ):
@ -1023,7 +1023,7 @@ class TestCorporateMemoryCollector:
                return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
            ),
            patch(
-                "services.corporate_memory.collector.create_extractor",
+                "connectors.llm.create_extractor_from_env_or_config",
                return_value=mock_extractor,
            ),
        ):
@ -1032,8 +1032,8 @@ class TestCorporateMemoryCollector:
        assert len(stats["errors"]) == 1
        assert "LLM error" in stats["errors"][0]

-    def test_collect_all_no_ai_config_skips(self, tmp_path):
-        """collect_all skips when instance.yaml has no ai: section."""
+    def test_collect_all_no_ai_config_or_env_raises(self, tmp_path, monkeypatch):
+        """collect_all fails fast when neither ai: config nor LLM env keys exist (#176)."""
        from services.corporate_memory.collector import collect_all

        home = tmp_path / "home"
@ -1042,6 +1042,10 @@ class TestCorporateMemoryCollector:
        user_dir.mkdir()
        (user_dir / "CLAUDE.local.md").write_text("Some content")

+        # Make sure no env-var fallback is available so the factory raises.
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.delenv("LLM_API_KEY", raising=False)
+
        with (
            patch("services.corporate_memory.collector.HOME_BASE", home),
            patch("services.corporate_memory.collector._read_json", return_value={}),
@ -1049,10 +1053,9 @@ class TestCorporateMemoryCollector:
                "config.loader.load_instance_config",
                return_value={"server": {"host": "example.com"}},
            ),
+            pytest.raises(ValueError, match="LLM not configured"),
        ):
-            stats = collect_all(dry_run=True)
-
-        assert stats["skipped"] is True
+            collect_all(dry_run=True)


 # ===================================================================
@ -1323,8 +1326,8 @@ class TestCollectorExtractorIntegration:
        assert stats["items_extracted"] == 0
        assert stats["errors"] == []

-    def test_collector_handles_invalid_config(self, tmp_path):
-        """Collector returns error when config is invalid."""
+    def test_collector_raises_on_invalid_config(self, tmp_path):
+        """Collector fail-fasts (raises ValueError) when ai: config is invalid (#176)."""
        from services.corporate_memory.collector import collect_all

        home = tmp_path / "home"
@ -1340,8 +1343,6 @@ class TestCollectorExtractorIntegration:
                "config.loader.load_instance_config",
                return_value={"ai": {"provider": "anthropic", "api_key": ""}},
            ),
+            pytest.raises(ValueError, match="must not be empty"),
        ):
-            stats = collect_all(dry_run=True)
-
-        assert len(stats["errors"]) == 1
-        assert "must not be empty" in stats["errors"][0]
+            collect_all(dry_run=True)