From 9f33e24bf9bd1be8c68a720d6c4f50aca6c0319d Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Tue, 5 May 2026 05:57:22 +0200 Subject: [PATCH] fix(config): overlay-aware LLM consumers + env-ref resolution (#179 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devin BUG: /api/admin/configure seeds an ai: block to the writable overlay at DATA_DIR/state/instance.yaml, but the three LLM consumers imported from config.loader.load_instance_config — which reads the static config dir only. Even if they had read the overlay, the loader ran yaml.safe_load directly without passing through _resolve_env_refs, so '${ANTHROPIC_API_KEY}' would have stayed a literal placeholder. The pipeline appeared to work because the factory falls back to the env var directly, but the overlay path itself was dead code. Two fixes, both required: 1. Switched the three LLM consumers to app.instance_config.load_instance_config: - services/corporate_memory/collector.py:collect_all - services/verification_detector/__main__.py:main - app/api/admin.py:run_verification_detector 2. app/instance_config.py runs the loaded overlay through config.loader._resolve_env_refs *before* the deep-merge, so '${ANTHROPIC_API_KEY}' resolves at config-load time. New regression suite tests/test_instance_config_overlay.py pins: - env-ref resolution against the overlay (resolved when env set, empty when env missing — never the literal placeholder) - deep-merge still preserves static-only sections - the three consumers reach app.instance_config (inspected via inspect.getsource so a future refactor that reverts the import fails the test) - end-to-end: a seeded overlay + ANTHROPIC_API_KEY env reaches the factory with a resolved api_key --- app/api/admin.py | 5 +- app/instance_config.py | 11 + services/corporate_memory/collector.py | 7 +- services/verification_detector/__main__.py | 5 +- tests/test_instance_config_overlay.py | 233 +++++++++++++++++++++ 5 files changed, 258 insertions(+), 3 deletions(-) create mode 100644 tests/test_instance_config_overlay.py diff --git a/app/api/admin.py b/app/api/admin.py index c6a51a2..a5c1f7a 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2843,8 +2843,11 @@ def run_verification_detector( # Build the extractor lazily so the endpoint surfaces a 500 with the # factory's actionable error when no ai: block + no env keys are set. + # Use the overlay-aware loader (#179 review fix) so an ai: block written + # by /api/admin/configure to DATA_DIR/state/instance.yaml actually flows + # through to the factory. try: - from config.loader import load_instance_config + from app.instance_config import load_instance_config try: instance_config = load_instance_config() except (ValueError, FileNotFoundError): diff --git a/app/instance_config.py b/app/instance_config.py index c317a30..6155608 100644 --- a/app/instance_config.py +++ b/app/instance_config.py @@ -93,11 +93,22 @@ def load_instance_config() -> dict: # write-side endpoints (POST /api/admin/server-config and /configure) # refuse to overwrite a corrupt overlay with HTTP 500, so an admin # noticing the saves break is the second line of defence. + # + # ${ENV_VAR} interpolation: ``config.loader.load_instance_config`` runs + # the static base through ``_resolve_env_refs`` already, but raw + # ``yaml.safe_load`` here would leave overlay strings like + # ``${ANTHROPIC_API_KEY}`` as literal placeholders. /api/admin/configure + # writes exactly that string into the seeded ai: block (#176), so we + # mirror the resolver here before the deep-merge — without it, the + # LLM factory receives the literal placeholder and rejects it as an + # invalid api key (#179 review fix). data_dir = Path(os.environ.get("DATA_DIR", "./data")) overlay_path = data_dir / "state" / "instance.yaml" if overlay_path.exists(): try: overlay = yaml.safe_load(overlay_path.read_text()) or {} + from config.loader import _resolve_env_refs + overlay = _resolve_env_refs(overlay) base = _deep_merge(base, overlay) logger.info("Merged overlay from %s", overlay_path) except Exception: diff --git a/services/corporate_memory/collector.py b/services/corporate_memory/collector.py index 10fd341..4484dca 100644 --- a/services/corporate_memory/collector.py +++ b/services/corporate_memory/collector.py @@ -424,7 +424,12 @@ def collect_all(dry_run: bool = False) -> dict: # actionable misconfiguration message instead of swallowing it into # stats["errors"]. FileNotFoundError on the static config path is fine # to swallow because the factory's env fallback can still satisfy. - from config.loader import load_instance_config + # + # Use the overlay-aware loader (#179 review fix) so an ai: block written + # by /api/admin/configure to DATA_DIR/state/instance.yaml actually flows + # through to the factory; config.loader.load_instance_config reads the + # static config dir only and would silently miss the overlay. + from app.instance_config import load_instance_config from connectors.llm import create_extractor_from_env_or_config try: diff --git a/services/verification_detector/__main__.py b/services/verification_detector/__main__.py index 851389d..3937c71 100644 --- a/services/verification_detector/__main__.py +++ b/services/verification_detector/__main__.py @@ -50,9 +50,12 @@ def main() -> None: setup_logging(__name__, level="DEBUG" if args.verbose else "INFO") # Load AI config; fail fast on missing config + env (#176). + # Use the overlay-aware loader (#179 review fix) so an ai: block written + # by /api/admin/configure to DATA_DIR/state/instance.yaml actually flows + # through to the factory. from connectors.llm import create_extractor_from_env_or_config try: - from config.loader import load_instance_config + from app.instance_config import load_instance_config try: config = load_instance_config() diff --git a/tests/test_instance_config_overlay.py b/tests/test_instance_config_overlay.py new file mode 100644 index 0000000..91120e2 --- /dev/null +++ b/tests/test_instance_config_overlay.py @@ -0,0 +1,233 @@ +"""Regression tests for app.instance_config overlay handling (#179 review). + +Two paths to a working LLM pipeline must both function: + +1. Operator hand-edits config/instance.yaml — covered by config.loader's + existing ``_resolve_env_refs`` pass. +2. Operator hits /api/admin/configure on first-time setup — that handler + seeds an ``ai:`` block in the writable overlay at + ``${DATA_DIR}/state/instance.yaml`` referencing ``${ANTHROPIC_API_KEY}``. + +Path 2 used to be dead code: the three LLM consumers +(``services.corporate_memory.collector.collect_all``, +``app.api.admin.run_verification_detector`` and +``services.verification_detector.__main__``) imported from +``config.loader.load_instance_config`` (overlay-blind), and even if they +hadn't, ``app.instance_config.load_instance_config`` deep-merged the +overlay through raw ``yaml.safe_load`` without resolving ``${ENV_VAR}`` +references. The factory then rejected the literal placeholder string as +an invalid api_key. + +This file pins both fixes: +- env-ref resolution runs against the overlay before deep-merge +- the three consumers reach the overlay-aware loader +""" + +import os +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + + +@pytest.fixture(autouse=True) +def _reset_instance_cache(): + """Drop the ``app.instance_config._instance_config`` cache between tests. + + Without this, a test that pollutes the cache leaks into the next one. + The same reset endpoint that ``/api/admin/server-config`` uses after + a save is the supported public entry point. + """ + from app import instance_config as ic + ic.reset_cache() + yield + ic.reset_cache() + + +def _write_overlay(data_dir: Path, payload: dict) -> Path: + """Drop a writable overlay at the path the loader actually reads.""" + overlay_path = data_dir / "state" / "instance.yaml" + overlay_path.parent.mkdir(parents=True, exist_ok=True) + overlay_path.write_text(yaml.dump(payload)) + return overlay_path + + +class TestOverlayEnvResolution: + """${ENV_VAR} placeholders in the overlay must resolve at load time.""" + + def test_env_ref_in_overlay_resolves_when_env_set(self, tmp_path, monkeypatch): + """Overlay's ${ANTHROPIC_API_KEY} resolves to the env value.""" + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + monkeypatch.setenv("ANTHROPIC_API_KEY", "secret123") + + _write_overlay(tmp_path, {"ai": {"api_key": "${ANTHROPIC_API_KEY}"}}) + + # Block the static base loader so this test is hermetic — the only + # signal we care about is that the overlay path resolves the ref. + with patch("config.loader.load_instance_config", return_value={}): + from app.instance_config import load_instance_config + cfg = load_instance_config() + + assert cfg.get("ai", {}).get("api_key") == "secret123" + + def test_env_ref_in_overlay_left_unresolved_when_env_missing( + self, tmp_path, monkeypatch, + ): + """When the env var isn't set, the placeholder collapses to empty. + + This mirrors ``_resolve_env_refs``'s contract: missing env logs a + warning and substitutes an empty string. The LLM factory's separate + env fallback (ANTHROPIC_API_KEY -> AnthropicExtractor) is what + ultimately surfaces the actionable error to the operator — this + test pins that the config layer doesn't fabricate a valid-looking + key when the env is empty. + """ + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + _write_overlay(tmp_path, {"ai": {"api_key": "${ANTHROPIC_API_KEY}"}}) + + with patch("config.loader.load_instance_config", return_value={}): + from app.instance_config import load_instance_config + cfg = load_instance_config() + + # Empty string, not the literal "${ANTHROPIC_API_KEY}". The factory + # treats empty as missing and raises the documented ValueError, so + # the eventual error message points the operator at the env, not + # at a malformed YAML. + assert cfg.get("ai", {}).get("api_key") == "" + + def test_overlay_deep_merges_with_static_base(self, tmp_path, monkeypatch): + """Overlay wins per-leaf; sections only in the base still flow through.""" + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + monkeypatch.setenv("ANTHROPIC_API_KEY", "k") + + _write_overlay(tmp_path, {"ai": {"api_key": "${ANTHROPIC_API_KEY}"}}) + + # Static base contributes a section the overlay doesn't touch. + static_base = { + "instance": {"name": "Test"}, + "datasets": {"foo": {"id": 1}}, + } + with patch("config.loader.load_instance_config", return_value=static_base): + from app.instance_config import load_instance_config + cfg = load_instance_config() + + assert cfg["instance"]["name"] == "Test" + assert cfg["datasets"] == {"foo": {"id": 1}} + assert cfg["ai"]["api_key"] == "k" + + +class TestConsumersUseOverlayAwareLoader: + """The three LLM pipeline consumers must reach the overlay path.""" + + def test_collector_imports_app_instance_config(self): + """``collect_all`` imports load_instance_config from app.instance_config.""" + import inspect + + from services.corporate_memory.collector import collect_all + + src = inspect.getsource(collect_all) + # The overlay-aware loader is the only one that merges + # DATA_DIR/state/instance.yaml; a consumer that imports + # config.loader.load_instance_config silently misses overlay edits. + assert "from app.instance_config import load_instance_config" in src + assert "from config.loader import load_instance_config" not in src + + def test_admin_run_verification_detector_uses_overlay_loader(self): + """``run_verification_detector`` imports the overlay-aware loader.""" + import inspect + + from app.api.admin import run_verification_detector + + src = inspect.getsource(run_verification_detector) + assert "from app.instance_config import load_instance_config" in src + assert "from config.loader import load_instance_config" not in src + + def test_verification_detector_main_uses_overlay_loader(self): + """The verification-detector CLI main reads through the overlay.""" + import inspect + + from services.verification_detector import __main__ as vd_main + + src = inspect.getsource(vd_main) + assert "from app.instance_config import load_instance_config" in src + # config.loader may legitimately appear in other contexts in this + # module someday; keep the assertion narrow to the same statement. + assert "from config.loader import load_instance_config" not in src + + +class TestSeededOverlayReachesFactory: + """End-to-end: seeded overlay + env -> factory receives a usable api_key.""" + + def test_collector_seeded_overlay_flows_through_to_factory( + self, tmp_path, monkeypatch, + ): + """The seeded ai: block + ANTHROPIC_API_KEY env yields a real extractor. + + Reproduces the path /api/admin/configure produces on first-time + setup: an overlay containing only ``ai: {api_key: ${ANTHROPIC_API_KEY}, ...}`` + plus the env var set by the operator. With the #179 review fixes, + the factory must see the resolved key — without them, it would + either miss the overlay entirely or get the literal placeholder. + """ + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env") + + _write_overlay(tmp_path, { + "ai": { + "provider": "anthropic", + "api_key": "${ANTHROPIC_API_KEY}", + "model": "claude-haiku-4-5-20251001", + }, + }) + + # Static base empty so only the overlay path matters. + with patch("config.loader.load_instance_config", return_value={}): + captured = {} + + def _spy(ai_config): + captured["ai_config"] = ai_config + from unittest.mock import MagicMock + return MagicMock() + + from connectors.llm import factory as llm_factory + + with patch.object(llm_factory, "create_extractor_from_env_or_config", _spy): + # Re-import via the lazy import inside collect_all by mocking + # the lookup at the package level (matches how collector imports). + import connectors.llm as llm_pkg + with patch.object( + llm_pkg, "create_extractor_from_env_or_config", _spy, + ): + home = tmp_path / "home" + home.mkdir() + user_dir = home / "alice" + user_dir.mkdir() + (user_dir / "CLAUDE.local.md").write_text("hello") + + from services.corporate_memory.collector import collect_all + with patch( + "services.corporate_memory.collector.HOME_BASE", home, + ), patch( + "services.corporate_memory.collector._read_json", + return_value={}, + ): + # The factory mock returns a MagicMock extractor whose + # extract_json default returns a MagicMock — the catalog + # processing code expects a dict-shaped response. We + # don't care about post-extractor behavior here, only + # that the factory was called with the resolved overlay. + try: + collect_all(dry_run=True) + except Exception: + pass + + assert captured.get("ai_config") is not None, ( + "Factory was never called — collector did not reach the overlay loader" + ) + assert captured["ai_config"].get("api_key") == "sk-ant-from-env", ( + "Factory received an unresolved or missing api_key — " + "overlay env-ref resolution is broken" + )