agnes-the-ai-analyst/connectors/llm/openai_compat.py

"""OpenAI-compatible provider for structured JSON extraction.

Supports any OpenAI-compatible API endpoint with progressive fallback
for structured output: json_schema -> json_object -> prompt-based JSON.
"""

import json
import logging
import re
import time
from urllib.parse import urlparse

import httpx
import openai

from .exceptions import (
    LLMAuthError,
    LLMFormatError,
    LLMRateLimitError,
    LLMRefusalError,
    LLMTimeoutError,
    LLMUnsupportedError,
)

logger = logging.getLogger(__name__)

# Retry configuration
MAX_RETRIES = 3
INITIAL_BACKOFF_SECONDS = 2
BACKOFF_MULTIPLIER = 2

# Regex to strip markdown code fences and extract JSON
_JSON_FENCE_PATTERN = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)


def _sanitize_url(url: str) -> str:
    """Extract scheme://host from a URL for safe logging.

    Never logs path, query params, or fragments which may contain
    tokens or sensitive information.
    """
    parsed = urlparse(url)
    return f"{parsed.scheme}://{parsed.netloc}"


def _extract_json_from_text(text: str) -> dict:
    """Parse JSON from potentially markdown-wrapped text.

    Tries direct parsing first, then strips markdown code fences,
    then falls back to finding content between first { and last }.

    Raises:
        LLMFormatError: If no valid JSON can be extracted.
    """
    # Try direct parse first
    stripped = text.strip()
    try:
        return json.loads(stripped)
    except json.JSONDecodeError:
        pass

    # Try stripping markdown code fences
    fence_match = _JSON_FENCE_PATTERN.search(stripped)
    if fence_match:
        try:
            return json.loads(fence_match.group(1).strip())
        except json.JSONDecodeError:
            pass

    # Fallback: find JSON between first { and last }
    first_brace = stripped.find("{")
    last_brace = stripped.rfind("}")
    if first_brace != -1 and last_brace > first_brace:
        try:
            return json.loads(stripped[first_brace:last_brace + 1])
        except json.JSONDecodeError:
            pass

    raise LLMFormatError(f"Could not extract valid JSON from model response")


class OpenAICompatExtractor:
    """Structured JSON extractor for OpenAI-compatible APIs.

    Supports progressive fallback for structured output based on the
    configured strategy:
    - "strict": json_schema only, raises LLMUnsupportedError if not supported
    - "json": json_schema -> json_object fallback
    - "auto": json_schema -> json_object -> prompt-based JSON (default)
    """

    def __init__(
        self,
        api_key: str,
        base_url: str,
        model: str,
        structured_output: str = "auto",
        verify_ssl: bool = True,
    ) -> None:
        """Initialize the OpenAI-compatible extractor.

        Args:
            api_key: API key for authentication.
            base_url: Base URL of the OpenAI-compatible API.
            model: Model identifier.
            structured_output: Fallback strategy - "strict", "json", or "auto".
            verify_ssl: Whether to verify SSL certificates. Set to False for
                corporate proxies with self-signed certificates.
        """
        # Custom httpx client for SSL control (corporate proxies often use self-signed certs)
        http_client = httpx.Client(verify=verify_ssl)
        self._client = openai.OpenAI(
            api_key=api_key, base_url=base_url, http_client=http_client,
        )
        self._model = model
        self._structured_output = structured_output
        self._safe_url = _sanitize_url(base_url)

        # Suppress OpenAI SDK and HTTP client debug logging which dumps full
        # request bodies including prompt content — this is a security requirement
        for noisy_logger in ("openai", "httpx", "httpcore"):
            logging.getLogger(noisy_logger).setLevel(logging.WARNING)

    def extract_json(
        self,
        prompt: str,
        max_tokens: int,
        json_schema: dict,
        schema_name: str,
    ) -> dict:
        """Extract structured JSON using an OpenAI-compatible API.

        Attempts structured output strategies in order of preference,
        falling back as allowed by the configured strategy.

        Args:
            prompt: The extraction prompt to send to the model.
            max_tokens: Maximum tokens in the response.
            json_schema: JSON Schema that the response must conform to.
            schema_name: Human-readable name for the schema.

        Returns:
            Parsed JSON dictionary conforming to the provided schema.

        Raises:
            LLMAuthError: Invalid API key.
            LLMRateLimitError: Rate limited after all retries.
            LLMTimeoutError: Timeout/connection error after all retries.
            LLMFormatError: Response is not valid JSON.
            LLMRefusalError: Model refused to respond.
            LLMUnsupportedError: Required feature not supported and no fallback allowed.
        """
        strategies = self._get_strategies()

        for strategy in strategies:
            try:
                logger.info(
                    "OpenAI-compat extraction: url=%s, model=%s, strategy=%s, schema=%s",
                    self._safe_url, self._model, strategy, schema_name,
                )
                return self._extract_with_strategy(
                    prompt, max_tokens, json_schema, schema_name, strategy,
                )
            except LLMUnsupportedError:
                logger.info(
                    "Strategy %s not supported at %s, trying next fallback",
                    strategy, self._safe_url,
                )
                continue

        raise LLMUnsupportedError(
            f"No supported structured output strategy for {self._safe_url} "
            f"with configured mode '{self._structured_output}'"
        )

    def _get_strategies(self) -> list[str]:
        """Get ordered list of strategies to try based on configuration."""
        if self._structured_output == "strict":
            return ["json_schema"]
        elif self._structured_output == "json":
            return ["json_schema", "json_object"]
        else:  # "auto"
            return ["json_schema", "json_object", "text"]

    def _extract_with_strategy(
        self,
        prompt: str,
        max_tokens: int,
        json_schema: dict,
        schema_name: str,
        strategy: str,
    ) -> dict:
        """Execute extraction with a specific structured output strategy."""
        last_exception: Exception | None = None

        for attempt in range(1, MAX_RETRIES + 1):
            try:
                return self._attempt_extraction(
                    prompt, max_tokens, json_schema, schema_name,
                    strategy, attempt,
                )
            except LLMAuthError:
                raise
            except LLMRefusalError:
                raise
            except LLMUnsupportedError:
                raise
            except (LLMRateLimitError, LLMTimeoutError) as e:
                last_exception = e
                if attempt < MAX_RETRIES:
                    delay = INITIAL_BACKOFF_SECONDS * (BACKOFF_MULTIPLIER ** (attempt - 1))
                    logger.warning(
                        "Transient error on attempt %d/%d for %s model %s, "
                        "retrying in %ds: %s",
                        attempt, MAX_RETRIES, self._safe_url,
                        self._model, delay, type(e).__name__,
                    )
                    time.sleep(delay)

        raise last_exception  # type: ignore[misc]

    def _attempt_extraction(
        self,
        prompt: str,
        max_tokens: int,
        json_schema: dict,
        schema_name: str,
        strategy: str,
        attempt: int,
    ) -> dict:
        """Single extraction attempt with a specific strategy."""
        logger.info(
            "OpenAI-compat attempt %d/%d, url=%s, model=%s, strategy=%s",
            attempt, MAX_RETRIES, self._safe_url, self._model, strategy,
        )

        messages = [{"role": "user", "content": prompt}]
        kwargs: dict = {
            "model": self._model,
            "max_tokens": max_tokens,
            "messages": messages,
        }

        if strategy == "json_schema":
            kwargs["response_format"] = {
                "type": "json_schema",
                "json_schema": {
                    "name": schema_name,
                    "strict": True,
                    "schema": json_schema,
                },
            }
        elif strategy == "json_object":
            kwargs["response_format"] = {"type": "json_object"}
        elif strategy == "text":
            # Append JSON instruction to prompt for text-based fallback
            messages = [
                {
                    "role": "user",
                    "content": prompt + "\n\nIMPORTANT: Respond with valid JSON only, no markdown.",
                },
            ]
            kwargs["messages"] = messages

        from src.observability import trace_generation

        try:
            with trace_generation(provider="openai_compat", model=self._model) as _trace:
                _trace.set_input(prompt)
                response = self._client.chat.completions.create(**kwargs)
                _trace.set_output_from_openai(response)
        except openai.AuthenticationError as e:
            raise LLMAuthError(
                f"OpenAI-compat authentication failed at {self._safe_url} (check API key)"
            ) from e
        except openai.RateLimitError as e:
            raise LLMRateLimitError(
                f"OpenAI-compat rate limited at {self._safe_url}"
            ) from e
        except (openai.APITimeoutError, openai.APIConnectionError) as e:
            raise LLMTimeoutError(
                f"OpenAI-compat connection error at {self._safe_url} ({type(e).__name__})"
            ) from e
        except openai.BadRequestError as e:
            # json_schema format not supported by this endpoint
            error_msg = str(e).lower()
            if "response_format" in error_msg or "json_schema" in error_msg:
                raise LLMUnsupportedError(
                    f"Structured output strategy '{strategy}' not supported "
                    f"at {self._safe_url}"
                ) from e
            raise LLMFormatError(
                f"Bad request at {self._safe_url} ({type(e).__name__})"
            ) from e

        choice = response.choices[0]

        # Check for truncation - raise and let outer retry loop handle it
        if choice.finish_reason == "length":
            raise LLMFormatError(
                f"Response truncated (max_tokens) for schema {schema_name} "
                f"at {self._safe_url}"
            )

        # Check for refusal
        content = choice.message.content
        if not content:
            raise LLMRefusalError(
                f"Model at {self._safe_url} refused to generate response "
                f"for schema {schema_name}"
            )

        # Parse JSON from response
        if strategy == "text":
            return _extract_json_from_text(content)

        try:
            return json.loads(content)
        except json.JSONDecodeError as e:
            raise LLMFormatError(
                f"Failed to parse response as JSON for schema {schema_name} "
                f"at {self._safe_url} ({type(e).__name__})"
            ) from e