fix(tests): align test_llm_connector with new factory + fail-fast (#179 review)

The PR rewrote collect_all() to call the new
create_extractor_from_env_or_config() helper, but the existing tests
still mocked the old direct create_extractor() symbol and the old
silent-skip-on-missing-config behavior. Five tests in
TestCorporateMemoryCollector and one in TestCollectorExtractorIntegration
were red on the PR branch.

Changes:
- Tests now mock connectors.llm.create_extractor_from_env_or_config
  (the symbol the collector imports lazily).
- Renamed test_collect_all_no_ai_config_skips ->
  test_collect_all_no_ai_config_or_env_raises and
  test_collector_handles_invalid_config -> test_collector_raises_on_invalid_config.
  Both assert pytest.raises(ValueError) — the explicit fail-fast
  semantics defect 5 of #176 was supposed to enforce.
- collect_all() no longer swallows the factory's ValueError into
  stats["errors"]; it propagates so the scheduler / admin endpoint
  surface the actionable misconfiguration message instead of
  pretending the run was a no-op.
- /api/admin/run-corporate-memory translates the propagated ValueError
  into a 500 with the factory's message, matching
  /api/admin/run-verification-detector.
This commit is contained in:
ZdenekSrotyr 2026-05-05 05:55:01 +02:00
parent 567385d046
commit 98a8aba3be
3 changed files with 36 additions and 30 deletions

View file

@ -2887,7 +2887,13 @@ def run_corporate_memory(
"""
from services.corporate_memory.collector import collect_all
# Fail-fast (#176): collect_all raises ValueError when no ai: block AND
# no env keys are present. Surface the actionable factory message in a
# 500 instead of letting it crash the request anonymously.
try:
stats = collect_all(dry_run=False)
except ValueError as e:
raise HTTPException(status_code=500, detail=str(e))
AuditRepository(conn).log(
user_id=user.get("id"),

View file

@ -419,8 +419,11 @@ def collect_all(dry_run: bool = False) -> dict:
# Step 2: Initialize AI extractor.
# Fail-fast (#176): no silent skip on missing ai: block. The factory
# falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a
# clear ValueError if neither config nor env is available.
try:
# clear ValueError if neither config nor env is available — propagate
# the ValueError so the scheduler / admin endpoint surface the
# actionable misconfiguration message instead of swallowing it into
# stats["errors"]. FileNotFoundError on the static config path is fine
# to swallow because the factory's env fallback can still satisfy.
from config.loader import load_instance_config
from connectors.llm import create_extractor_from_env_or_config
@ -430,10 +433,6 @@ def collect_all(dry_run: bool = False) -> dict:
instance_config = {}
ai_config = instance_config.get("ai") if instance_config else None
extractor = create_extractor_from_env_or_config(ai_config)
except (ValueError, FileNotFoundError) as e:
stats["errors"].append(str(e))
logger.error("Failed to initialize AI extractor: %s", e)
return stats
# Determine initial status for new items based on approval mode
governance_config = instance_config.get("corporate_memory", {})

View file

@ -871,7 +871,7 @@ class TestCorporateMemoryCollector:
return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
),
patch(
"services.corporate_memory.collector.create_extractor",
"connectors.llm.create_extractor_from_env_or_config",
return_value=mock_extractor,
),
):
@ -921,7 +921,7 @@ class TestCorporateMemoryCollector:
return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
),
patch(
"services.corporate_memory.collector.create_extractor",
"connectors.llm.create_extractor_from_env_or_config",
return_value=mock_extractor,
),
):
@ -991,7 +991,7 @@ class TestCorporateMemoryCollector:
return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
),
patch(
"services.corporate_memory.collector.create_extractor",
"connectors.llm.create_extractor_from_env_or_config",
return_value=mock_extractor,
),
):
@ -1023,7 +1023,7 @@ class TestCorporateMemoryCollector:
return_value={"ai": {"provider": "anthropic", "api_key": "sk-test"}},
),
patch(
"services.corporate_memory.collector.create_extractor",
"connectors.llm.create_extractor_from_env_or_config",
return_value=mock_extractor,
),
):
@ -1032,8 +1032,8 @@ class TestCorporateMemoryCollector:
assert len(stats["errors"]) == 1
assert "LLM error" in stats["errors"][0]
def test_collect_all_no_ai_config_skips(self, tmp_path):
"""collect_all skips when instance.yaml has no ai: section."""
def test_collect_all_no_ai_config_or_env_raises(self, tmp_path, monkeypatch):
"""collect_all fails fast when neither ai: config nor LLM env keys exist (#176)."""
from services.corporate_memory.collector import collect_all
home = tmp_path / "home"
@ -1042,6 +1042,10 @@ class TestCorporateMemoryCollector:
user_dir.mkdir()
(user_dir / "CLAUDE.local.md").write_text("Some content")
# Make sure no env-var fallback is available so the factory raises.
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("LLM_API_KEY", raising=False)
with (
patch("services.corporate_memory.collector.HOME_BASE", home),
patch("services.corporate_memory.collector._read_json", return_value={}),
@ -1049,10 +1053,9 @@ class TestCorporateMemoryCollector:
"config.loader.load_instance_config",
return_value={"server": {"host": "example.com"}},
),
pytest.raises(ValueError, match="LLM not configured"),
):
stats = collect_all(dry_run=True)
assert stats["skipped"] is True
collect_all(dry_run=True)
# ===================================================================
@ -1323,8 +1326,8 @@ class TestCollectorExtractorIntegration:
assert stats["items_extracted"] == 0
assert stats["errors"] == []
def test_collector_handles_invalid_config(self, tmp_path):
"""Collector returns error when config is invalid."""
def test_collector_raises_on_invalid_config(self, tmp_path):
"""Collector fail-fasts (raises ValueError) when ai: config is invalid (#176)."""
from services.corporate_memory.collector import collect_all
home = tmp_path / "home"
@ -1340,8 +1343,6 @@ class TestCollectorExtractorIntegration:
"config.loader.load_instance_config",
return_value={"ai": {"provider": "anthropic", "api_key": ""}},
),
pytest.raises(ValueError, match="must not be empty"),
):
stats = collect_all(dry_run=True)
assert len(stats["errors"]) == 1
assert "must not be empty" in stats["errors"][0]
collect_all(dry_run=True)