diff --git a/connectors/llm/factory.py b/connectors/llm/factory.py index c160dcc..b7896c0 100644 --- a/connectors/llm/factory.py +++ b/connectors/llm/factory.py @@ -96,17 +96,20 @@ def create_extractor(ai_config: dict) -> StructuredExtractor: f"got '{structured_output}'" ) + verify_ssl = ai_config.get("verify_ssl", True) + safe_url = _sanitize_url(base_url) logger.info( "Creating OpenAICompatExtractor, url=%s, model=%s, " - "structured_output=%s", - safe_url, model, structured_output, + "structured_output=%s, verify_ssl=%s", + safe_url, model, structured_output, verify_ssl, ) return OpenAICompatExtractor( api_key=api_key, base_url=base_url, model=model, structured_output=structured_output, + verify_ssl=verify_ssl, ) else: diff --git a/connectors/llm/openai_compat.py b/connectors/llm/openai_compat.py index d11621b..9e4799f 100644 --- a/connectors/llm/openai_compat.py +++ b/connectors/llm/openai_compat.py @@ -10,6 +10,7 @@ import re import time from urllib.parse import urlparse +import httpx import openai from .exceptions import ( @@ -94,6 +95,7 @@ class OpenAICompatExtractor: base_url: str, model: str, structured_output: str = "auto", + verify_ssl: bool = True, ) -> None: """Initialize the OpenAI-compatible extractor. @@ -102,12 +104,23 @@ class OpenAICompatExtractor: base_url: Base URL of the OpenAI-compatible API. model: Model identifier. structured_output: Fallback strategy - "strict", "json", or "auto". + verify_ssl: Whether to verify SSL certificates. Set to False for + corporate proxies with self-signed certificates. """ - self._client = openai.OpenAI(api_key=api_key, base_url=base_url) + # Custom httpx client for SSL control (corporate proxies often use self-signed certs) + http_client = httpx.Client(verify=verify_ssl) + self._client = openai.OpenAI( + api_key=api_key, base_url=base_url, http_client=http_client, + ) self._model = model self._structured_output = structured_output self._safe_url = _sanitize_url(base_url) + # Suppress OpenAI SDK debug logging which dumps full request bodies + # including prompt content — this is a security requirement + logging.getLogger("openai").setLevel(logging.WARNING) + logging.getLogger("httpx").setLevel(logging.WARNING) + def extract_json( self, prompt: str,