Fix SSL verification and suppress OpenAI SDK debug logging

- Add verify_ssl config option for corporate proxies with self-signed certs
- Suppress openai/httpx debug loggers that dump full request bodies
  (including prompt content) — security requirement
This commit is contained in:
Petr 2026-03-23 12:56:04 +01:00
parent 95358448e6
commit f619fadc42
2 changed files with 19 additions and 3 deletions

View file

@ -96,17 +96,20 @@ def create_extractor(ai_config: dict) -> StructuredExtractor:
f"got '{structured_output}'" f"got '{structured_output}'"
) )
verify_ssl = ai_config.get("verify_ssl", True)
safe_url = _sanitize_url(base_url) safe_url = _sanitize_url(base_url)
logger.info( logger.info(
"Creating OpenAICompatExtractor, url=%s, model=%s, " "Creating OpenAICompatExtractor, url=%s, model=%s, "
"structured_output=%s", "structured_output=%s, verify_ssl=%s",
safe_url, model, structured_output, safe_url, model, structured_output, verify_ssl,
) )
return OpenAICompatExtractor( return OpenAICompatExtractor(
api_key=api_key, api_key=api_key,
base_url=base_url, base_url=base_url,
model=model, model=model,
structured_output=structured_output, structured_output=structured_output,
verify_ssl=verify_ssl,
) )
else: else:

View file

@ -10,6 +10,7 @@ import re
import time import time
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx
import openai import openai
from .exceptions import ( from .exceptions import (
@ -94,6 +95,7 @@ class OpenAICompatExtractor:
base_url: str, base_url: str,
model: str, model: str,
structured_output: str = "auto", structured_output: str = "auto",
verify_ssl: bool = True,
) -> None: ) -> None:
"""Initialize the OpenAI-compatible extractor. """Initialize the OpenAI-compatible extractor.
@ -102,12 +104,23 @@ class OpenAICompatExtractor:
base_url: Base URL of the OpenAI-compatible API. base_url: Base URL of the OpenAI-compatible API.
model: Model identifier. model: Model identifier.
structured_output: Fallback strategy - "strict", "json", or "auto". structured_output: Fallback strategy - "strict", "json", or "auto".
verify_ssl: Whether to verify SSL certificates. Set to False for
corporate proxies with self-signed certificates.
""" """
self._client = openai.OpenAI(api_key=api_key, base_url=base_url) # Custom httpx client for SSL control (corporate proxies often use self-signed certs)
http_client = httpx.Client(verify=verify_ssl)
self._client = openai.OpenAI(
api_key=api_key, base_url=base_url, http_client=http_client,
)
self._model = model self._model = model
self._structured_output = structured_output self._structured_output = structured_output
self._safe_url = _sanitize_url(base_url) self._safe_url = _sanitize_url(base_url)
# Suppress OpenAI SDK debug logging which dumps full request bodies
# including prompt content — this is a security requirement
logging.getLogger("openai").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
def extract_json( def extract_json(
self, self,
prompt: str, prompt: str,