fix(setup): seed default ai: block + env-var fallback (#176)
POST /api/admin/configure now writes a default ai: block into the
instance.yaml overlay when the request leaves it untouched and either
ANTHROPIC_API_KEY or LLM_API_KEY is set in the environment. The block
references the env var via ${VAR} syntax — secrets never land in YAML.
connectors.llm.factory grows create_extractor_from_env_or_config which
falls back to ANTHROPIC_API_KEY / LLM_API_KEY when ai_config is empty
and raises a clear ValueError when neither is available. Both
services/corporate_memory and services/verification_detector switch to
the new helper, replacing the old 'silently skip when ai: missing'
path that was the silent-failure root cause.
Tests:
- tests/test_setup_ai_block.py — overlay seeding contract.
- tests/test_llm_provider_env_fallback.py — fallback + fail-fast.
This commit is contained in:
parent
d2104555c6
commit
bbb04ac041
7 changed files with 283 additions and 20 deletions
|
|
@ -2626,6 +2626,29 @@ async def configure_instance(
|
|||
"location": request.bigquery_location or "us",
|
||||
}
|
||||
|
||||
# Seed an ai: block on first-time setup so LLM-driven services
|
||||
# (corporate_memory, verification_detector) can boot without manual
|
||||
# YAML editing. Only inserts when the overlay has no ai: yet AND an
|
||||
# appropriate env var is present — never overwrites operator config,
|
||||
# never writes a placeholder block (#176).
|
||||
if "ai" not in overlay:
|
||||
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
|
||||
llm_key = os.environ.get("LLM_API_KEY", "").strip()
|
||||
if anthropic_key:
|
||||
overlay["ai"] = {
|
||||
"provider": "anthropic",
|
||||
"api_key": "${ANTHROPIC_API_KEY}",
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"structured_output": "auto",
|
||||
}
|
||||
elif llm_key:
|
||||
overlay["ai"] = {
|
||||
"provider": "anthropic",
|
||||
"api_key": "${LLM_API_KEY}",
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"structured_output": "auto",
|
||||
}
|
||||
|
||||
# Atomic write to writable data volume — same tmp + os.replace pattern
|
||||
# as the server-config editor so a concurrent save can't tear the file.
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ providers with automatic fallback strategies for structured output.
|
|||
"""
|
||||
|
||||
from .base import StructuredExtractor
|
||||
from .factory import create_extractor
|
||||
from .factory import create_extractor, create_extractor_from_env_or_config
|
||||
|
||||
__all__ = ["StructuredExtractor", "create_extractor"]
|
||||
__all__ = [
|
||||
"StructuredExtractor",
|
||||
"create_extractor",
|
||||
"create_extractor_from_env_or_config",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ and creates the appropriate StructuredExtractor implementation.
|
|||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .anthropic_provider import AnthropicExtractor
|
||||
|
|
@ -120,6 +121,48 @@ def create_extractor(ai_config: dict) -> StructuredExtractor:
|
|||
)
|
||||
|
||||
|
||||
def create_extractor_from_env_or_config(
|
||||
ai_config: dict | None,
|
||||
) -> StructuredExtractor:
|
||||
"""Build an extractor from config, falling back to env vars.
|
||||
|
||||
Resolution order (#176):
|
||||
|
||||
1. ``ai_config`` is a non-empty dict → delegate to :func:`create_extractor`.
|
||||
2. ``ANTHROPIC_API_KEY`` set → AnthropicExtractor with the default model.
|
||||
3. ``LLM_API_KEY`` set without a base_url → AnthropicExtractor (the proxy
|
||||
case typically also wires a base_url, in which case the operator should
|
||||
use the explicit ai: block; this fallback is a best-effort convenience).
|
||||
4. Otherwise raise ``ValueError`` with a clear actionable message — never
|
||||
silently exit, never return ``None``. The previous "skip when ai: is
|
||||
missing" behavior was the silent-failure root cause in #176.
|
||||
"""
|
||||
if ai_config:
|
||||
return create_extractor(ai_config)
|
||||
|
||||
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
|
||||
llm_key = os.environ.get("LLM_API_KEY", "").strip()
|
||||
|
||||
if anthropic_key:
|
||||
logger.info(
|
||||
"No ai: block in instance.yaml; falling back to ANTHROPIC_API_KEY env var"
|
||||
)
|
||||
return AnthropicExtractor(api_key=anthropic_key, model=DEFAULT_MODEL)
|
||||
|
||||
if llm_key:
|
||||
logger.info(
|
||||
"No ai: block in instance.yaml; falling back to LLM_API_KEY env var"
|
||||
)
|
||||
return AnthropicExtractor(api_key=llm_key, model=DEFAULT_MODEL)
|
||||
|
||||
raise ValueError(
|
||||
"LLM not configured. Add an ai: block to instance.yaml (see "
|
||||
"config/instance.yaml.example) OR set ANTHROPIC_API_KEY / LLM_API_KEY "
|
||||
"in the environment. The corporate-memory and verification-detector "
|
||||
"services cannot run without one of these."
|
||||
)
|
||||
|
||||
|
||||
def _validate_api_key(api_key: str) -> None:
|
||||
"""Validate that an API key is present and non-empty.
|
||||
|
||||
|
|
|
|||
|
|
@ -416,17 +416,20 @@ def collect_all(dry_run: bool = False) -> dict:
|
|||
stats["skipped"] = True
|
||||
return stats
|
||||
|
||||
# Step 2: Initialize AI extractor
|
||||
# Step 2: Initialize AI extractor.
|
||||
# Fail-fast (#176): no silent skip on missing ai: block. The factory
|
||||
# falls back to ANTHROPIC_API_KEY / LLM_API_KEY env vars and raises a
|
||||
# clear ValueError if neither config nor env is available.
|
||||
try:
|
||||
from config.loader import load_instance_config
|
||||
from connectors.llm import create_extractor_from_env_or_config
|
||||
|
||||
try:
|
||||
instance_config = load_instance_config()
|
||||
ai_config = instance_config.get("ai")
|
||||
if not ai_config:
|
||||
logger.warning("No ai: section in instance.yaml, skipping catalog refresh")
|
||||
stats["skipped"] = True
|
||||
return stats
|
||||
extractor = create_extractor(ai_config)
|
||||
except (ValueError, FileNotFoundError):
|
||||
instance_config = {}
|
||||
ai_config = instance_config.get("ai") if instance_config else None
|
||||
extractor = create_extractor_from_env_or_config(ai_config)
|
||||
except (ValueError, FileNotFoundError) as e:
|
||||
stats["errors"].append(str(e))
|
||||
logger.error("Failed to initialize AI extractor: %s", e)
|
||||
|
|
|
|||
|
|
@ -49,22 +49,25 @@ def main() -> None:
|
|||
|
||||
setup_logging(__name__, level="DEBUG" if args.verbose else "INFO")
|
||||
|
||||
# Load AI config lazily (same pattern as corporate memory collector)
|
||||
# Load AI config; fail fast on missing config + env (#176).
|
||||
from connectors.llm import create_extractor_from_env_or_config
|
||||
try:
|
||||
from config.loader import load_instance_config
|
||||
|
||||
try:
|
||||
config = load_instance_config()
|
||||
ai_config = config.get("ai")
|
||||
if not ai_config:
|
||||
logger.error("No ai: section in instance.yaml, cannot run verification detector")
|
||||
sys.exit(1)
|
||||
except (ValueError, FileNotFoundError):
|
||||
config = {}
|
||||
ai_config = config.get("ai") if config else None
|
||||
extractor = create_extractor_from_env_or_config(ai_config)
|
||||
except (ValueError, FileNotFoundError) as e:
|
||||
logger.error("Failed to load config: %s", e)
|
||||
logger.error(
|
||||
"Failed to initialize verification detector: %s. "
|
||||
"Configure ai: in instance.yaml or set ANTHROPIC_API_KEY / LLM_API_KEY.",
|
||||
e,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
from connectors.llm import create_extractor
|
||||
|
||||
extractor = create_extractor(ai_config)
|
||||
conn = get_system_db()
|
||||
|
||||
if args.reset:
|
||||
|
|
|
|||
80
tests/test_llm_provider_env_fallback.py
Normal file
80
tests/test_llm_provider_env_fallback.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""LLM provider env-var fallback + fail-fast behavior.
|
||||
|
||||
When no ai: block is configured, `connectors.llm.factory.create_extractor`
|
||||
must:
|
||||
|
||||
1. Build an extractor from `ANTHROPIC_API_KEY` / `LLM_API_KEY` if either
|
||||
env var is set (so an operator who only edited .env still gets a
|
||||
working pipeline).
|
||||
2. Fail fast with a clear error if neither config nor env is available.
|
||||
No silent skip.
|
||||
|
||||
Closes one of five defects in #176.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from connectors.llm.factory import create_extractor, create_extractor_from_env_or_config
|
||||
|
||||
|
||||
class TestEnvFallback:
|
||||
"""Mocks the AnthropicExtractor constructor so the tests don't try to
|
||||
open a live SDK client (which loads system SSL certs at __init__ time
|
||||
and is unhappy on machines with corporate CA-bundle env vars pointing
|
||||
at a non-existent file). The test surface is the factory routing logic,
|
||||
not the SDK wiring — that's covered by tests/test_llm_providers_full.py.
|
||||
"""
|
||||
|
||||
def test_anthropic_env_fallback_builds_extractor(self, monkeypatch):
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env-aaa")
|
||||
monkeypatch.delenv("LLM_API_KEY", raising=False)
|
||||
with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls:
|
||||
ex = create_extractor_from_env_or_config(ai_config=None)
|
||||
assert ex is mock_cls.return_value
|
||||
kwargs = mock_cls.call_args.kwargs
|
||||
assert kwargs["api_key"] == "sk-ant-from-env-aaa"
|
||||
|
||||
def test_llm_api_key_env_fallback_builds_extractor(self, monkeypatch):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.setenv("LLM_API_KEY", "sk-proxy-from-env-bbb")
|
||||
with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls:
|
||||
ex = create_extractor_from_env_or_config(ai_config=None)
|
||||
assert ex is mock_cls.return_value
|
||||
kwargs = mock_cls.call_args.kwargs
|
||||
assert kwargs["api_key"] == "sk-proxy-from-env-bbb"
|
||||
|
||||
def test_no_config_no_env_fails_fast(self, monkeypatch):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.delenv("LLM_API_KEY", raising=False)
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
create_extractor_from_env_or_config(ai_config=None)
|
||||
msg = str(excinfo.value)
|
||||
# Error must mention BOTH config + env paths so operators know how to fix it.
|
||||
assert "instance.yaml" in msg or "ai:" in msg
|
||||
assert "ANTHROPIC_API_KEY" in msg
|
||||
|
||||
def test_explicit_ai_config_wins_over_env(self, monkeypatch):
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "ignored-because-config-wins")
|
||||
ai_cfg = {
|
||||
"provider": "anthropic",
|
||||
"api_key": "sk-ant-from-cfg-zzz",
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
}
|
||||
with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls:
|
||||
ex = create_extractor_from_env_or_config(ai_config=ai_cfg)
|
||||
assert ex is mock_cls.return_value
|
||||
kwargs = mock_cls.call_args.kwargs
|
||||
assert kwargs["api_key"] == "sk-ant-from-cfg-zzz"
|
||||
|
||||
def test_empty_dict_falls_through_to_env(self, monkeypatch):
|
||||
"""ai: {} is treated the same as no block — fall through to env vars."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env-ccc")
|
||||
with patch("connectors.llm.factory.AnthropicExtractor") as mock_cls:
|
||||
ex = create_extractor_from_env_or_config(ai_config={})
|
||||
assert ex is mock_cls.return_value
|
||||
kwargs = mock_cls.call_args.kwargs
|
||||
assert kwargs["api_key"] == "sk-ant-from-env-ccc"
|
||||
107
tests/test_setup_ai_block.py
Normal file
107
tests/test_setup_ai_block.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Tests for /api/admin/configure writing a default ai: block.
|
||||
|
||||
First-time setup must seed an ai: section in the instance.yaml overlay so
|
||||
LLM-driven services (corporate_memory, verification_detector) can boot
|
||||
without a manual edit. Closes one of five defects in #176.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def _auth(token: str) -> dict:
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
def _read_overlay(env: dict) -> dict:
|
||||
overlay_path = env["data_dir"] / "state" / "instance.yaml"
|
||||
if not overlay_path.exists():
|
||||
return {}
|
||||
return yaml.safe_load(overlay_path.read_text()) or {}
|
||||
|
||||
|
||||
class TestConfigureSeedsAiBlock:
|
||||
def test_configure_seeds_ai_block_when_anthropic_api_key_is_set(self, seeded_app, monkeypatch):
|
||||
"""ANTHROPIC_API_KEY in the env → overlay gets an ai: block referencing it."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-keyvalue")
|
||||
c = seeded_app["client"]
|
||||
token = seeded_app["admin_token"]
|
||||
resp = c.post(
|
||||
"/api/admin/configure",
|
||||
json={"data_source": "local"},
|
||||
headers=_auth(token),
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
overlay = _read_overlay(seeded_app["env"])
|
||||
assert "ai" in overlay, "configure must seed ai: block when ANTHROPIC_API_KEY is set"
|
||||
ai = overlay["ai"]
|
||||
assert ai.get("provider") == "anthropic"
|
||||
# The overlay stores the env-var reference (${ANTHROPIC_API_KEY}), not
|
||||
# the raw secret — secrets belong in .env_overlay only.
|
||||
assert ai.get("api_key") == "${ANTHROPIC_API_KEY}"
|
||||
assert "model" in ai
|
||||
|
||||
def test_configure_seeds_ai_block_when_llm_api_key_is_set(self, seeded_app, monkeypatch):
|
||||
"""LLM_API_KEY (proxy/openai_compat fallback) is also acceptable."""
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.setenv("LLM_API_KEY", "sk-proxy-keyvalue")
|
||||
c = seeded_app["client"]
|
||||
token = seeded_app["admin_token"]
|
||||
resp = c.post(
|
||||
"/api/admin/configure",
|
||||
json={"data_source": "local"},
|
||||
headers=_auth(token),
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
overlay = _read_overlay(seeded_app["env"])
|
||||
assert "ai" in overlay
|
||||
# The fallback uses ${LLM_API_KEY} — same env-var-reference pattern.
|
||||
assert overlay["ai"].get("api_key") == "${LLM_API_KEY}"
|
||||
|
||||
def test_configure_does_not_seed_ai_when_no_key_in_env(self, seeded_app, monkeypatch):
|
||||
"""No env keys → no ai block written. Operator must add manually.
|
||||
|
||||
We deliberately do not write a placeholder block: the LLM services
|
||||
fail-fast on a missing block and the operator gets a clear error.
|
||||
"""
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.delenv("LLM_API_KEY", raising=False)
|
||||
c = seeded_app["client"]
|
||||
token = seeded_app["admin_token"]
|
||||
resp = c.post(
|
||||
"/api/admin/configure",
|
||||
json={"data_source": "local"},
|
||||
headers=_auth(token),
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
overlay = _read_overlay(seeded_app["env"])
|
||||
assert "ai" not in overlay
|
||||
|
||||
def test_configure_preserves_existing_ai_block(self, seeded_app, monkeypatch):
|
||||
"""If overlay already has ai: section, configure must not overwrite it."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-from-env")
|
||||
# Pre-populate the overlay with a custom ai block.
|
||||
overlay_path = seeded_app["env"]["data_dir"] / "state" / "instance.yaml"
|
||||
overlay_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
overlay_path.write_text(yaml.dump({
|
||||
"ai": {
|
||||
"provider": "openai_compat",
|
||||
"api_key": "${LLM_API_KEY}",
|
||||
"base_url": "https://litellm.example.com",
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
}
|
||||
}))
|
||||
|
||||
c = seeded_app["client"]
|
||||
token = seeded_app["admin_token"]
|
||||
resp = c.post(
|
||||
"/api/admin/configure",
|
||||
json={"data_source": "local"},
|
||||
headers=_auth(token),
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
overlay = _read_overlay(seeded_app["env"])
|
||||
assert overlay["ai"]["provider"] == "openai_compat"
|
||||
assert overlay["ai"]["base_url"] == "https://litellm.example.com"
|
||||
Loading…
Reference in a new issue