agnes-the-ai-analyst/tests/test_llm_providers_full.py
ZdenekSrotyr 3c653b6dc2 test: add connector test suite (Block D) — 5 files, 58 tests
Tests cover Keboola extractor (extension + legacy fallback, _remote_attach),
BigQuery extractor (remote views, contract validation), Jira service
(webhook processing, HMAC verification, HTTP mocking), Jira incremental
transform (upsert/delete, monthly parquet partitioning), and LLM providers
(factory, AnthropicExtractor retry/auth, OpenAICompatExtractor strategy
cascade, JSON extraction helpers). Also adds tests/helpers/factories.py
with WebhookEventFactory.
2026-04-12 11:12:50 +02:00

273 lines
11 KiB
Python

"""Full tests for LLM provider factory and extractors."""
import json
from unittest.mock import MagicMock, patch
import anthropic
import openai
import pytest
from connectors.llm.anthropic_provider import AnthropicExtractor
from connectors.llm.exceptions import (
LLMAuthError,
LLMFormatError,
LLMRateLimitError,
LLMRefusalError,
LLMTimeoutError,
LLMUnsupportedError,
)
from connectors.llm.factory import DEFAULT_MODEL, create_extractor
from connectors.llm.openai_compat import OpenAICompatExtractor, _extract_json_from_text
# ---------------------------------------------------------------------------
# Mock response helpers
# ---------------------------------------------------------------------------
def _anthropic_response(text: str, stop_reason: str = "end_turn"):
block = MagicMock()
block.text = text
resp = MagicMock()
resp.content = [block]
resp.stop_reason = stop_reason
return resp
def _openai_response(content: str | None, finish_reason: str = "stop"):
message = MagicMock()
message.content = content
choice = MagicMock()
choice.message = message
choice.finish_reason = finish_reason
resp = MagicMock()
resp.choices = [choice]
return resp
_SCHEMA = {"type": "object", "properties": {"value": {"type": "string"}}}
# ---------------------------------------------------------------------------
# Factory tests
# ---------------------------------------------------------------------------
class TestCreateExtractor:
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_anthropic_provider_returns_anthropic_extractor(self, _mock):
config = {"provider": "anthropic", "api_key": "sk-ant-test"}
ext = create_extractor(config)
assert isinstance(ext, AnthropicExtractor)
@patch("connectors.llm.openai_compat.openai.OpenAI")
@patch("connectors.llm.openai_compat.httpx.Client")
def test_openai_compat_provider_returns_openai_extractor(self, _mock_http, _mock_oai):
config = {
"provider": "openai_compat",
"api_key": "sk-test",
"base_url": "https://api.openai.com/v1",
}
ext = create_extractor(config)
assert isinstance(ext, OpenAICompatExtractor)
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_legacy_anthropic_key_format(self, _mock):
"""anthropic_api_key (legacy format) still creates AnthropicExtractor."""
config = {"anthropic_api_key": "sk-ant-legacy"}
ext = create_extractor(config)
assert isinstance(ext, AnthropicExtractor)
def test_missing_provider_raises_value_error(self):
with pytest.raises(ValueError, match="ai.provider is required"):
create_extractor({"api_key": "sk-test"})
def test_empty_config_raises_value_error(self):
with pytest.raises(ValueError):
create_extractor({})
def test_unknown_provider_raises_value_error(self):
with pytest.raises(ValueError, match="Unknown ai.provider"):
create_extractor({"provider": "cohere", "api_key": "sk-test"})
@patch("connectors.llm.openai_compat.openai.OpenAI")
@patch("connectors.llm.openai_compat.httpx.Client")
def test_openai_compat_missing_base_url_raises(self, _mock_http, _mock_oai):
with pytest.raises(ValueError, match="base_url is required"):
create_extractor({"provider": "openai_compat", "api_key": "sk-test"})
def test_empty_api_key_raises_value_error(self):
with pytest.raises(ValueError, match="api_key"):
create_extractor({"provider": "anthropic", "api_key": ""})
# ---------------------------------------------------------------------------
# AnthropicExtractor tests
# ---------------------------------------------------------------------------
class TestAnthropicExtractor:
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_extract_json_success(self, mock_cls):
"""extract_json returns parsed dict on successful API call."""
mock_client = MagicMock()
mock_cls.return_value = mock_client
mock_client.messages.create.return_value = _anthropic_response('{"value": "hello"}')
ext = AnthropicExtractor(api_key="sk-ant-test", model="claude-haiku-4-5-20251001")
result = ext.extract_json("prompt", 1000, _SCHEMA, "test_schema")
assert result == {"value": "hello"}
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_auth_error_raises_immediately(self, mock_cls):
"""AuthenticationError is raised immediately without retry."""
mock_client = MagicMock()
mock_cls.return_value = mock_client
mock_client.messages.create.side_effect = anthropic.AuthenticationError(
message="Invalid key", response=MagicMock(), body={}
)
ext = AnthropicExtractor(api_key="bad-key", model="claude-haiku-4-5-20251001")
with pytest.raises(LLMAuthError):
ext.extract_json("prompt", 1000, _SCHEMA, "test_schema")
# Should only be called once — no retry
assert mock_client.messages.create.call_count == 1
@patch("connectors.llm.anthropic_provider.time.sleep")
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_rate_limit_retries_and_raises(self, mock_cls, mock_sleep):
"""RateLimitError is retried MAX_RETRIES times then raises LLMRateLimitError."""
from connectors.llm.anthropic_provider import MAX_RETRIES
mock_client = MagicMock()
mock_cls.return_value = mock_client
mock_client.messages.create.side_effect = anthropic.RateLimitError(
message="Rate limited", response=MagicMock(), body={}
)
ext = AnthropicExtractor(api_key="sk-ant-test", model="claude-haiku-4-5-20251001")
with pytest.raises(LLMRateLimitError):
ext.extract_json("prompt", 1000, _SCHEMA, "test_schema")
assert mock_client.messages.create.call_count == MAX_RETRIES
@patch("connectors.llm.anthropic_provider.anthropic.Anthropic")
def test_truncated_response_raises_format_error(self, mock_cls):
"""max_tokens stop_reason raises LLMFormatError."""
mock_client = MagicMock()
mock_cls.return_value = mock_client
mock_client.messages.create.return_value = _anthropic_response(
'{"partial":', stop_reason="max_tokens"
)
ext = AnthropicExtractor(api_key="sk-ant-test", model="claude-haiku-4-5-20251001")
with pytest.raises(LLMFormatError, match="truncated"):
ext.extract_json("prompt", 10, _SCHEMA, "test_schema")
# ---------------------------------------------------------------------------
# OpenAICompatExtractor tests
# ---------------------------------------------------------------------------
class TestOpenAICompatExtractor:
def _make_extractor(self, structured_output: str = "auto") -> OpenAICompatExtractor:
with patch("connectors.llm.openai_compat.openai.OpenAI"), \
patch("connectors.llm.openai_compat.httpx.Client"):
return OpenAICompatExtractor(
api_key="sk-test",
base_url="https://api.example.com/v1",
model="gpt-4o-mini",
structured_output=structured_output,
)
def test_extract_json_success_json_schema(self):
"""extract_json succeeds with json_schema strategy."""
ext = self._make_extractor()
ext._client = MagicMock()
ext._client.chat.completions.create.return_value = _openai_response('{"value": "ok"}')
result = ext.extract_json("prompt", 1000, _SCHEMA, "test")
assert result == {"value": "ok"}
def test_strategy_cascade_falls_back_on_bad_request(self):
"""json_schema unsupported -> falls back to json_object strategy (auto mode)."""
ext = self._make_extractor(structured_output="auto")
ext._client = MagicMock()
# First call (json_schema) raises BadRequestError about response_format
bad_request_error = openai.BadRequestError(
message="response_format json_schema not supported",
response=MagicMock(status_code=400),
body={"error": {"message": "response_format json_schema not supported"}},
)
success_response = _openai_response('{"value": "fallback"}')
ext._client.chat.completions.create.side_effect = [bad_request_error, success_response]
result = ext.extract_json("prompt", 1000, _SCHEMA, "test")
assert result == {"value": "fallback"}
def test_auth_error_raises_immediately(self):
"""AuthenticationError is not retried."""
ext = self._make_extractor()
ext._client = MagicMock()
ext._client.chat.completions.create.side_effect = openai.AuthenticationError(
message="Invalid key",
response=MagicMock(status_code=401),
body={},
)
with pytest.raises(LLMAuthError):
ext.extract_json("prompt", 1000, _SCHEMA, "test")
assert ext._client.chat.completions.create.call_count == 1
def test_strict_mode_raises_unsupported(self):
"""strict mode does not fall back; raises LLMUnsupportedError."""
ext = self._make_extractor(structured_output="strict")
ext._client = MagicMock()
ext._client.chat.completions.create.side_effect = openai.BadRequestError(
message="json_schema not supported",
response=MagicMock(status_code=400),
body={"error": {"message": "json_schema not supported"}},
)
with pytest.raises(LLMUnsupportedError):
ext.extract_json("prompt", 1000, _SCHEMA, "test")
def test_refusal_raises_immediately(self):
"""Empty content (refusal) raises LLMRefusalError."""
ext = self._make_extractor()
ext._client = MagicMock()
ext._client.chat.completions.create.return_value = _openai_response(None)
with pytest.raises(LLMRefusalError):
ext.extract_json("prompt", 1000, _SCHEMA, "test")
# ---------------------------------------------------------------------------
# _extract_json_from_text tests
# ---------------------------------------------------------------------------
class TestExtractJsonFromText:
def test_direct_json_parse(self):
assert _extract_json_from_text('{"key": "val"}') == {"key": "val"}
def test_strips_markdown_code_fence(self):
text = '```json\n{"key": "fenced"}\n```'
assert _extract_json_from_text(text) == {"key": "fenced"}
def test_strips_plain_code_fence(self):
text = "```\n{\"key\": \"plain\"}\n```"
assert _extract_json_from_text(text) == {"key": "plain"}
def test_brace_extraction_fallback(self):
text = "Here is the JSON: {\"key\": \"brace\"} and some trailing text."
assert _extract_json_from_text(text) == {"key": "brace"}
def test_raises_format_error_on_invalid(self):
with pytest.raises(LLMFormatError):
_extract_json_from_text("This is definitely not JSON at all.")