From 98a8aba3bebce65f31d7df7fe45623b669be5f95 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Tue, 5 May 2026 05:55:01 +0200 Subject: [PATCH] fix(tests): align test_llm_connector with new factory + fail-fast (#179 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR rewrote collect_all() to call the new create_extractor_from_env_or_config() helper, but the existing tests still mocked the old direct create_extractor() symbol and the old silent-skip-on-missing-config behavior. Five tests in TestCorporateMemoryCollector and one in TestCollectorExtractorIntegration were red on the PR branch. Changes: - Tests now mock connectors.llm.create_extractor_from_env_or_config (the symbol the collector imports lazily). - Renamed test_collect_all_no_ai_config_skips -> test_collect_all_no_ai_config_or_env_raises and test_collector_handles_invalid_config -> test_collector_raises_on_invalid_config. Both assert pytest.raises(ValueError) — the explicit fail-fast semantics defect 5 of #176 was supposed to enforce. - collect_all() no longer swallows the factory's ValueError into stats["errors"]; it propagates so the scheduler / admin endpoint surface the actionable misconfiguration message instead of pretending the run was a no-op. - /api/admin/run-corporate-memory translates the propagated ValueError into a 500 with the factory's message, matching /api/admin/run-verification-detector. --- app/api/admin.py | 8 ++++++- services/corporate_memory/collector.py | 27 +++++++++++----------- tests/test_llm_connector.py | 31 +++++++++++++------------- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/app/api/admin.py b/app/api/admin.py index efaacff..c6a51a2 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2887,7 +2887,13 @@ def run_corporate_memory( """ from services.corporate_memory.collector import collect_all - stats = collect_all(dry_run=False) + # Fail-fast (#176): collect_all raises ValueError when no ai: block AND + # no env keys are present. Surface the actionable factory message in a + # 500 instead of letting it crash the request anonymously. + try: + stats = collect_all(dry_run=False) + except ValueError as e: + raise HTTPException(status_code=500, detail=str(e)) AuditRepository(conn).log( user_id=user.get("id"), diff --git a/services/corporate_memory/collector.py b/services/corporate_memory/collector.py index 80a8449..10fd341 100644 --- a/services/corporate_memory/collector.py +++ b/services/corporate_memory/collector.py @@ -419,21 +419,20 @@ def collect_all(dry_run: bool = False) -> dict: # Step 2: Initialize AI extractor. # Fail-fast (#176): no silent skip on missing ai: block. The factory # falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a - # clear ValueError if neither config nor env is available. - try: - from config.loader import load_instance_config - from connectors.llm import create_extractor_from_env_or_config + # clear ValueError if neither config nor env is available — propagate + # the ValueError so the scheduler / admin endpoint surface the + # actionable misconfiguration message instead of swallowing it into + # stats["errors"]. FileNotFoundError on the static config path is fine + # to swallow because the factory's env fallback can still satisfy. + from config.loader import load_instance_config + from connectors.llm import create_extractor_from_env_or_config - try: - instance_config = load_instance_config() - except (ValueError, FileNotFoundError): - instance_config = {} - ai_config = instance_config.get("ai") if instance_config else None - extractor = create_extractor_from_env_or_config(ai_config) - except (ValueError, FileNotFoundError) as e: - stats["errors"].append(str(e)) - logger.error("Failed to initialize AI extractor: %s", e) - return stats + try: + instance_config = load_instance_config() + except (ValueError, FileNotFoundError): + instance_config = {} + ai_config = instance_config.get("ai") if instance_config else None + extractor = create_extractor_from_env_or_config(ai_config) # Determine initial status for new items based on approval mode governance_config = instance_config.get("corporate_memory", {}) diff --git a/tests/test_llm_connector.py b/tests/test_llm_connector.py index 29297d6..311adb4 100644 --- a/tests/test_llm_connector.py +++ b/tests/test_llm_connector.py @@ -871,7 +871,7 @@ class TestCorporateMemoryCollector: return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}}, ), patch( - "services.corporate_memory.collector.create_extractor", + "connectors.llm.create_extractor_from_env_or_config", return_value=mock_extractor, ), ): @@ -921,7 +921,7 @@ class TestCorporateMemoryCollector: return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}}, ), patch( - "services.corporate_memory.collector.create_extractor", + "connectors.llm.create_extractor_from_env_or_config", return_value=mock_extractor, ), ): @@ -991,7 +991,7 @@ class TestCorporateMemoryCollector: return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}}, ), patch( - "services.corporate_memory.collector.create_extractor", + "connectors.llm.create_extractor_from_env_or_config", return_value=mock_extractor, ), ): @@ -1023,7 +1023,7 @@ class TestCorporateMemoryCollector: return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}}, ), patch( - "services.corporate_memory.collector.create_extractor", + "connectors.llm.create_extractor_from_env_or_config", return_value=mock_extractor, ), ): @@ -1032,8 +1032,8 @@ class TestCorporateMemoryCollector: assert len(stats["errors"]) == 1 assert "LLM error" in stats["errors"][0] - def test_collect_all_no_ai_config_skips(self, tmp_path): - """collect_all skips when instance.yaml has no ai: section.""" + def test_collect_all_no_ai_config_or_env_raises(self, tmp_path, monkeypatch): + """collect_all fails fast when neither ai: config nor LLM env keys exist (#176).""" from services.corporate_memory.collector import collect_all home = tmp_path / "home" @@ -1042,6 +1042,10 @@ class TestCorporateMemoryCollector: user_dir.mkdir() (user_dir / "CLAUDE.local.md").write_text("Some content") + # Make sure no env-var fallback is available so the factory raises. + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("LLM_API_KEY", raising=False) + with ( patch("services.corporate_memory.collector.HOME_BASE", home), patch("services.corporate_memory.collector._read_json", return_value={}), @@ -1049,10 +1053,9 @@ class TestCorporateMemoryCollector: "config.loader.load_instance_config", return_value={"server": {"host": "example.com"}}, ), + pytest.raises(ValueError, match="LLM not configured"), ): - stats = collect_all(dry_run=True) - - assert stats["skipped"] is True + collect_all(dry_run=True) # =================================================================== @@ -1323,8 +1326,8 @@ class TestCollectorExtractorIntegration: assert stats["items_extracted"] == 0 assert stats["errors"] == [] - def test_collector_handles_invalid_config(self, tmp_path): - """Collector returns error when config is invalid.""" + def test_collector_raises_on_invalid_config(self, tmp_path): + """Collector fail-fasts (raises ValueError) when ai: config is invalid (#176).""" from services.corporate_memory.collector import collect_all home = tmp_path / "home" @@ -1340,8 +1343,6 @@ class TestCollectorExtractorIntegration: "config.loader.load_instance_config", return_value={"ai": {"provider": "anthropic", "api_key": ""}}, ), + pytest.raises(ValueError, match="must not be empty"), ): - stats = collect_all(dry_run=True) - - assert len(stats["errors"]) == 1 - assert "must not be empty" in stats["errors"][0] + collect_all(dry_run=True)