agnes-the-ai-analyst/connectors/llm/anthropic_provider.py
ZdenekSrotyr e86dd5edc5 fix(anthropic): strict json_schema (additionalProperties=false) + add /admin/scheduler-runs UI
E2E test on a real BQ deploy showed every verification-extraction call
fails with HTTP 400 invalid_request_error: "output_config.format.schema:
For 'object' type, 'additionalProperties' must be explicitly set to false".
The Anthropic structured-output API now requires the field on every object
node in the json_schema. Fix: connectors/llm/anthropic_provider.py wraps
the caller-supplied schema through a recursive _strict_json_schema()
walker that adds the field where missing (preserving any explicit
override), then passes the strict variant to the API. Six unit tests in
TestStrictJsonSchema pin the recursion across nested objects, array items,
and the no-mutation invariant.

Adds /admin/scheduler-runs — a read-only admin page that surfaces the
last 200 audit-log entries from scheduler-driven actions. New
AuditRepository.query_actions(actions, limit) helper, new admin nav
entry. Failed scheduler ticks (HTTP 401, network errors) don't reach
the audit_log; the page calls that out with a hint to set
SCHEDULER_API_TOKEN if no rows show up.
2026-05-05 08:00:57 +02:00

169 lines
5.7 KiB
Python

"""Anthropic provider for structured JSON extraction.
Uses the Anthropic API with native structured output (json_schema)
for reliable JSON extraction. Includes retry logic for transient errors.
"""
import json
import logging
import time
import anthropic
from .exceptions import (
LLMAuthError,
LLMFormatError,
LLMRateLimitError,
LLMRefusalError,
LLMTimeoutError,
)
logger = logging.getLogger(__name__)
# Retry configuration
MAX_RETRIES = 3
INITIAL_BACKOFF_SECONDS = 2
BACKOFF_MULTIPLIER = 2
def _strict_json_schema(schema):
"""Return a copy of the schema with additionalProperties=False on every object type.
The Anthropic structured-output API rejects schemas where a `{"type": "object"}` node
omits `additionalProperties` (HTTP 400 invalid_request_error). We walk the schema
recursively and force the field where missing.
"""
if isinstance(schema, dict):
out = {k: _strict_json_schema(v) for k, v in schema.items()}
if out.get("type") == "object" and "additionalProperties" not in out:
out["additionalProperties"] = False
return out
if isinstance(schema, list):
return [_strict_json_schema(item) for item in schema]
return schema
class AnthropicExtractor:
"""Structured JSON extractor using the Anthropic API.
Uses output_config with json_schema format for structured output.
Retries transient errors (rate limit, timeout, connection) with
exponential backoff.
"""
def __init__(self, api_key: str, model: str) -> None:
"""Initialize the Anthropic extractor.
Args:
api_key: Anthropic API key.
model: Model identifier (e.g., "claude-haiku-4-5-20251001").
"""
self._client = anthropic.Anthropic(api_key=api_key)
self._model = model
def extract_json(
self,
prompt: str,
max_tokens: int,
json_schema: dict,
schema_name: str,
) -> dict:
"""Extract structured JSON using the Anthropic API.
Args:
prompt: The extraction prompt to send to the model.
max_tokens: Maximum tokens in the response.
json_schema: JSON Schema that the response must conform to.
schema_name: Human-readable name for the schema.
Returns:
Parsed JSON dictionary conforming to the provided schema.
Raises:
LLMAuthError: Invalid API key.
LLMRateLimitError: Rate limited after all retries.
LLMTimeoutError: Timeout/connection error after all retries.
LLMFormatError: Response is not valid JSON.
LLMRefusalError: Model refused to respond.
"""
last_exception: Exception | None = None
for attempt in range(1, MAX_RETRIES + 1):
try:
return self._attempt_extraction(
prompt, max_tokens, json_schema, schema_name, attempt,
)
except LLMAuthError:
raise
except LLMRefusalError:
raise
except (LLMRateLimitError, LLMTimeoutError) as e:
last_exception = e
if attempt < MAX_RETRIES:
delay = INITIAL_BACKOFF_SECONDS * (BACKOFF_MULTIPLIER ** (attempt - 1))
logger.warning(
"Transient error on attempt %d/%d for model %s, "
"retrying in %ds: %s",
attempt, MAX_RETRIES, self._model, delay,
type(e).__name__,
)
time.sleep(delay)
raise last_exception # type: ignore[misc]
def _attempt_extraction(
self,
prompt: str,
max_tokens: int,
json_schema: dict,
schema_name: str,
attempt: int,
) -> dict:
"""Single extraction attempt against the Anthropic API."""
logger.info(
"Anthropic extraction attempt %d/%d, model=%s, schema=%s",
attempt, MAX_RETRIES, self._model, schema_name,
)
try:
response = self._client.messages.create(
model=self._model,
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
output_config={
"format": {
"type": "json_schema",
"schema": _strict_json_schema(json_schema),
},
},
)
except anthropic.AuthenticationError as e:
raise LLMAuthError("Anthropic authentication failed (check API key)") from e
except anthropic.RateLimitError as e:
raise LLMRateLimitError("Anthropic rate limited") from e
except (anthropic.APITimeoutError, anthropic.APIConnectionError) as e:
raise LLMTimeoutError(
f"Anthropic connection error ({type(e).__name__})"
) from e
# Check for truncation - raise and let outer retry loop handle it
if response.stop_reason == "max_tokens":
raise LLMFormatError(
f"Response truncated (max_tokens) for schema {schema_name}"
)
# Check for refusal
if response.stop_reason == "end_turn" and not response.content:
raise LLMRefusalError(
f"Model refused to generate response for schema {schema_name}"
)
# Parse JSON from response
try:
text = response.content[0].text
return json.loads(text)
except (json.JSONDecodeError, IndexError, AttributeError) as e:
raise LLMFormatError(
f"Failed to parse Anthropic response as JSON for "
f"schema {schema_name} ({type(e).__name__})"
) from e