From bbb04ac041420fadb95e6555105b783d45149a72 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 23:55:19 +0200 Subject: [PATCH] fix(setup): seed default ai: block + env-var fallback (#176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POST /api/admin/configure now writes a default ai: block into the instance.yaml overlay when the request leaves it untouched and either ANTHROPIC_API_KEY or LLM_API_KEY is set in the environment. The block references the env var via ${VAR} syntax — secrets never land in YAML. connectors.llm.factory grows create_extractor_from_env_or_config which falls back to ANTHROPIC_API_KEY / LLM_API_KEY when ai_config is empty and raises a clear ValueError when neither is available. Both services/corporate_memory and services/verification_detector switch to the new helper, replacing the old 'silently skip when ai: missing' path that was the silent-failure root cause. Tests: - tests/test_setup_ai_block.py — overlay seeding contract. - tests/test_llm_provider_env_fallback.py — fallback + fail-fast. --- app/api/admin.py | 23 +++++ connectors/llm/__init__.py | 8 +- connectors/llm/factory.py | 43 +++++++++ services/corporate_memory/collector.py | 19 ++-- services/verification_detector/__main__.py | 23 +++-- tests/test_llm_provider_env_fallback.py | 80 +++++++++++++++ tests/test_setup_ai_block.py | 107 +++++++++++++++++++++ 7 files changed, 283 insertions(+), 20 deletions(-) create mode 100644 tests/test_llm_provider_env_fallback.py create mode 100644 tests/test_setup_ai_block.py diff --git a/app/api/admin.py b/app/api/admin.py index 67c75a5..7a2f115 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2626,6 +2626,29 @@ async def configure_instance( "location": request.bigquery_location or "us", } + # Seed an ai: block on first-time setup so LLM-driven services + # (corporate_memory, verification_detector) can boot without manual + # YAML editing. Only inserts when the overlay has no ai: yet AND an + # appropriate env var is present — never overwrites operator config, + # never writes a placeholder block (#176). + if "ai" not in overlay: + anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() + llm_key = os.environ.get("LLM_API_KEY", "").strip() + if anthropic_key: + overlay["ai"] = { + "provider": "anthropic", + "api_key": "${ANTHROPIC_API_KEY}", + "model": "claude-haiku-4-5-20251001", + "structured_output": "auto", + } + elif llm_key: + overlay["ai"] = { + "provider": "anthropic", + "api_key": "${LLM_API_KEY}", + "model": "claude-haiku-4-5-20251001", + "structured_output": "auto", + } + # Atomic write to writable data volume — same tmp + os.replace pattern # as the server-config editor so a concurrent save can't tear the file. config_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/connectors/llm/__init__.py b/connectors/llm/__init__.py index ea01dd9..d754171 100644 --- a/connectors/llm/__init__.py +++ b/connectors/llm/__init__.py @@ -6,6 +6,10 @@ providers with automatic fallback strategies for structured output. """ from .base import StructuredExtractor -from .factory import create_extractor +from .factory import create_extractor, create_extractor_from_env_or_config -__all__ = ["StructuredExtractor", "create_extractor"] +__all__ = [ + "StructuredExtractor", + "create_extractor", + "create_extractor_from_env_or_config", +] diff --git a/connectors/llm/factory.py b/connectors/llm/factory.py index b7896c0..15e9718 100644 --- a/connectors/llm/factory.py +++ b/connectors/llm/factory.py @@ -5,6 +5,7 @@ and creates the appropriate StructuredExtractor implementation. """ import logging +import os from urllib.parse import urlparse from .anthropic_provider import AnthropicExtractor @@ -120,6 +121,48 @@ def create_extractor(ai_config: dict) -> StructuredExtractor: ) +def create_extractor_from_env_or_config( + ai_config: dict | None, +) -> StructuredExtractor: + """Build an extractor from config, falling back to env vars. + + Resolution order (#176): + + 1. ``ai_config`` is a non-empty dict → delegate to :func:`create_extractor`. + 2. ``ANTHROPIC_API_KEY`` set → AnthropicExtractor with the default model. + 3. ``LLM_API_KEY`` set without a base_url → AnthropicExtractor (the proxy + case typically also wires a base_url, in which case the operator should + use the explicit ai: block; this fallback is a best-effort convenience). + 4. Otherwise raise ``ValueError`` with a clear actionable message — never + silently exit, never return ``None``. The previous "skip when ai: is + missing" behavior was the silent-failure root cause in #176. + """ + if ai_config: + return create_extractor(ai_config) + + anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() + llm_key = os.environ.get("LLM_API_KEY", "").strip() + + if anthropic_key: + logger.info( + "No ai: block in instance.yaml; falling back to ANTHROPIC_API_KEY env var" + ) + return AnthropicExtractor(api_key=anthropic_key, model=DEFAULT_MODEL) + + if llm_key: + logger.info( + "No ai: block in instance.yaml; falling back to LLM_API_KEY env var" + ) + return AnthropicExtractor(api_key=llm_key, model=DEFAULT_MODEL) + + raise ValueError( + "LLM not configured. Add an ai: block to instance.yaml (see " + "config/instance.yaml.example) OR set ANTHROPIC_API_KEY / LLM_API_KEY " + "in the environment. The corporate-memory and verification-detector " + "services cannot run without one of these." + ) + + def _validate_api_key(api_key: str) -> None: """Validate that an API key is present and non-empty. diff --git a/services/corporate_memory/collector.py b/services/corporate_memory/collector.py index c1ecd32..80a8449 100644 --- a/services/corporate_memory/collector.py +++ b/services/corporate_memory/collector.py @@ -416,17 +416,20 @@ def collect_all(dry_run: bool = False) -> dict: stats["skipped"] = True return stats - # Step 2: Initialize AI extractor + # Step 2: Initialize AI extractor. + # Fail-fast (#176): no silent skip on missing ai: block. The factory + # falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a + # clear ValueError if neither config nor env is available. try: from config.loader import load_instance_config + from connectors.llm import create_extractor_from_env_or_config - instance_config = load_instance_config() - ai_config = instance_config.get("ai") - if not ai_config: - logger.warning("No ai: section in instance.yaml, skipping catalog refresh") - stats["skipped"] = True - return stats - extractor = create_extractor(ai_config) + try: + instance_config = load_instance_config() + except (ValueError, FileNotFoundError): + instance_config = {} + ai_config = instance_config.get("ai") if instance_config else None + extractor = create_extractor_from_env_or_config(ai_config) except (ValueError, FileNotFoundError) as e: stats["errors"].append(str(e)) logger.error("Failed to initialize AI extractor: %s", e) diff --git a/services/verification_detector/__main__.py b/services/verification_detector/__main__.py index 243fda6..851389d 100644 --- a/services/verification_detector/__main__.py +++ b/services/verification_detector/__main__.py @@ -49,22 +49,25 @@ def main() -> None: setup_logging(__name__, level="DEBUG" if args.verbose else "INFO") - # Load AI config lazily (same pattern as corporate memory collector) + # Load AI config; fail fast on missing config + env (#176). + from connectors.llm import create_extractor_from_env_or_config try: from config.loader import load_instance_config - config = load_instance_config() - ai_config = config.get("ai") - if not ai_config: - logger.error("No ai: section in instance.yaml, cannot run verification detector") - sys.exit(1) + try: + config = load_instance_config() + except (ValueError, FileNotFoundError): + config = {} + ai_config = config.get("ai") if config else None + extractor = create_extractor_from_env_or_config(ai_config) except (ValueError, FileNotFoundError) as e: - logger.error("Failed to load config: %s", e) + logger.error( + "Failed to initialize verification detector: %s. " + "Configure ai: in instance.yaml or set ANTHROPIC_API_KEY / LLM_API_KEY.", + e, + ) sys.exit(1) - from connectors.llm import create_extractor - - extractor = create_extractor(ai_config) conn = get_system_db() if args.reset: diff --git a/tests/test_llm_provider_env_fallback.py b/tests/test_llm_provider_env_fallback.py new file mode 100644 index 0000000..a5af1b2 --- /dev/null +++ b/tests/test_llm_provider_env_fallback.py @@ -0,0 +1,80 @@ +"""LLM provider env-var fallback + fail-fast behavior. + +When no ai: block is configured, `connectors.llm.factory.create_extractor` +must: + +1. Build an extractor from `ANTHROPIC_API_KEY` / `LLM_API_KEY` if either + env var is set (so an operator who only edited .env still gets a + working pipeline). +2. Fail fast with a clear error if neither config nor env is available. + No silent skip. + +Closes one of five defects in #176. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from connectors.llm.factory import create_extractor, create_extractor_from_env_or_config + + +class TestEnvFallback: + """Mocks the AnthropicExtractor constructor so the tests don't try to + open a live SDK client (which loads system SSL certs at __init__ time + and is unhappy on machines with corporate CA-bundle env vars pointing + at a non-existent file). The test surface is the factory routing logic, + not the SDK wiring — that's covered by tests/test_llm_providers_full.py. + """ + + def test_anthropic_env_fallback_builds_extractor(self, monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env-aaa") + monkeypatch.delenv("LLM_API_KEY", raising=False) + with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls: + ex = create_extractor_from_env_or_config(ai_config=None) + assert ex is mock_cls.return_value + kwargs = mock_cls.call_args.kwargs + assert kwargs["api_key"] == "sk-ant-from-env-aaa" + + def test_llm_api_key_env_fallback_builds_extractor(self, monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setenv("LLM_API_KEY", "sk-proxy-from-env-bbb") + with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls: + ex = create_extractor_from_env_or_config(ai_config=None) + assert ex is mock_cls.return_value + kwargs = mock_cls.call_args.kwargs + assert kwargs["api_key"] == "sk-proxy-from-env-bbb" + + def test_no_config_no_env_fails_fast(self, monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("LLM_API_KEY", raising=False) + with pytest.raises(ValueError) as excinfo: + create_extractor_from_env_or_config(ai_config=None) + msg = str(excinfo.value) + # Error must mention BOTH config + env paths so operators know how to fix it. + assert "instance.yaml" in msg or "ai:" in msg + assert "ANTHROPIC_API_KEY" in msg + + def test_explicit_ai_config_wins_over_env(self, monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "ignored-because-config-wins") + ai_cfg = { + "provider": "anthropic", + "api_key": "sk-ant-from-cfg-zzz", + "model": "claude-haiku-4-5-20251001", + } + with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls: + ex = create_extractor_from_env_or_config(ai_config=ai_cfg) + assert ex is mock_cls.return_value + kwargs = mock_cls.call_args.kwargs + assert kwargs["api_key"] == "sk-ant-from-cfg-zzz" + + def test_empty_dict_falls_through_to_env(self, monkeypatch): + """ai: {} is treated the same as no block — fall through to env vars.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env-ccc") + with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls: + ex = create_extractor_from_env_or_config(ai_config={}) + assert ex is mock_cls.return_value + kwargs = mock_cls.call_args.kwargs + assert kwargs["api_key"] == "sk-ant-from-env-ccc" diff --git a/tests/test_setup_ai_block.py b/tests/test_setup_ai_block.py new file mode 100644 index 0000000..cc79603 --- /dev/null +++ b/tests/test_setup_ai_block.py @@ -0,0 +1,107 @@ +"""Tests for /api/admin/configure writing a default ai: block. + +First-time setup must seed an ai: section in the instance.yaml overlay so +LLM-driven services (corporate_memory, verification_detector) can boot +without a manual edit. Closes one of five defects in #176. +""" + +from __future__ import annotations + +import yaml +from unittest.mock import patch + + +def _auth(token: str) -> dict: + return {"Authorization": f"Bearer {token}"} + + +def _read_overlay(env: dict) -> dict: + overlay_path = env["data_dir"] / "state" / "instance.yaml" + if not overlay_path.exists(): + return {} + return yaml.safe_load(overlay_path.read_text()) or {} + + +class TestConfigureSeedsAiBlock: + def test_configure_seeds_ai_block_when_anthropic_api_key_is_set(self, seeded_app, monkeypatch): + """ANTHROPIC_API_KEY in the env → overlay gets an ai: block referencing it.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-keyvalue") + c = seeded_app["client"] + token = seeded_app["admin_token"] + resp = c.post( + "/api/admin/configure", + json={"data_source": "local"}, + headers=_auth(token), + ) + assert resp.status_code == 200 + overlay = _read_overlay(seeded_app["env"]) + assert "ai" in overlay, "configure must seed ai: block when ANTHROPIC_API_KEY is set" + ai = overlay["ai"] + assert ai.get("provider") == "anthropic" + # The overlay stores the env-var reference (${ANTHROPIC_API_KEY}), not + # the raw secret — secrets belong in .env_overlay only. + assert ai.get("api_key") == "${ANTHROPIC_API_KEY}" + assert "model" in ai + + def test_configure_seeds_ai_block_when_llm_api_key_is_set(self, seeded_app, monkeypatch): + """LLM_API_KEY (proxy/openai_compat fallback) is also acceptable.""" + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setenv("LLM_API_KEY", "sk-proxy-keyvalue") + c = seeded_app["client"] + token = seeded_app["admin_token"] + resp = c.post( + "/api/admin/configure", + json={"data_source": "local"}, + headers=_auth(token), + ) + assert resp.status_code == 200 + overlay = _read_overlay(seeded_app["env"]) + assert "ai" in overlay + # The fallback uses ${LLM_API_KEY} — same env-var-reference pattern. + assert overlay["ai"].get("api_key") == "${LLM_API_KEY}" + + def test_configure_does_not_seed_ai_when_no_key_in_env(self, seeded_app, monkeypatch): + """No env keys → no ai block written. Operator must add manually. + + We deliberately do not write a placeholder block: the LLM services + fail-fast on a missing block and the operator gets a clear error. + """ + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("LLM_API_KEY", raising=False) + c = seeded_app["client"] + token = seeded_app["admin_token"] + resp = c.post( + "/api/admin/configure", + json={"data_source": "local"}, + headers=_auth(token), + ) + assert resp.status_code == 200 + overlay = _read_overlay(seeded_app["env"]) + assert "ai" not in overlay + + def test_configure_preserves_existing_ai_block(self, seeded_app, monkeypatch): + """If overlay already has ai: section, configure must not overwrite it.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env") + # Pre-populate the overlay with a custom ai block. + overlay_path = seeded_app["env"]["data_dir"] / "state" / "instance.yaml" + overlay_path.parent.mkdir(parents=True, exist_ok=True) + overlay_path.write_text(yaml.dump({ + "ai": { + "provider": "openai_compat", + "api_key": "${LLM_API_KEY}", + "base_url": "https://litellm.example.com", + "model": "claude-haiku-4-5-20251001", + } + })) + + c = seeded_app["client"] + token = seeded_app["admin_token"] + resp = c.post( + "/api/admin/configure", + json={"data_source": "local"}, + headers=_auth(token), + ) + assert resp.status_code == 200 + overlay = _read_overlay(seeded_app["env"]) + assert overlay["ai"]["provider"] == "openai_compat" + assert overlay["ai"]["base_url"] == "https://litellm.example.com"