agnes-the-ai-analyst/tests/test_openmetadata_client.py
Petr c5c24cb45b Implement OpenMetadata catalog integration (Phase 1)
Add OpenMetadata REST API connector and enricher to merge table/column metadata
from OpenMetadata catalog at sync and query time.

Changes:
- connectors/openmetadata/client.py: HTTP client for OM API
- connectors/openmetadata/enricher.py: Data enrichment with TTL cache
- tests/test_openmetadata_*: Unit tests for client and enricher
- src/config.py: Add catalog_fqn field to TableConfig
- src/data_sync.py: Use enricher in _generate_schema_yaml (catalog > BQ API > data_description.md)
- webapp/app.py: Initialize enricher, enrich catalog data with tags/tier/owners/url
- config/instance.yaml.example: Document openmetadata section

Features:
- FQN auto-derivation: bigquery.{table.id}
- TTL cache (default 1h) to avoid repeated API calls
- Graceful degradation: disabled if token missing, silent on HTTP errors
- Column description priority: catalog > BQ API > (none)
- Table description priority: catalog > data_description.md
2026-03-12 14:07:13 +01:00

157 lines
5.3 KiB
Python

"""
Tests for OpenMetadata client
"""
import pytest
import httpx
from unittest.mock import Mock, patch, MagicMock
from connectors.openmetadata.client import OpenMetadataClient
@pytest.fixture
def mock_httpx_client():
"""Mock httpx.Client."""
with patch("connectors.openmetadata.client.httpx.Client") as mock:
yield mock
def test_client_init(mock_httpx_client):
"""Test OpenMetadataClient initialization."""
client = OpenMetadataClient(
base_url="https://catalog.example.com",
token="test-token",
timeout=30,
)
assert client.base_url == "https://catalog.example.com"
assert client.token == "test-token"
assert client.timeout == 30
# Verify httpx.Client was called with correct headers
mock_httpx_client.assert_called_once()
call_kwargs = mock_httpx_client.call_args[1]
assert call_kwargs["headers"]["Authorization"] == "Bearer test-token"
def test_client_init_strips_trailing_slash():
"""Test that base_url trailing slash is stripped."""
with patch("connectors.openmetadata.client.httpx.Client"):
client = OpenMetadataClient(
base_url="https://catalog.example.com/",
token="test-token",
)
assert client.base_url == "https://catalog.example.com"
def test_get_table_success():
"""Test successful get_table() call."""
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
mock_client_instance = MagicMock()
mock_client_class.return_value = mock_client_instance
mock_response = MagicMock()
mock_response.json.return_value = {
"id": "table-id",
"name": "roi_datamart_v2",
"fullyQualifiedName": "bigquery.project.dataset.table",
"description": "Test table",
"columns": [
{"name": "id", "dataType": "INTEGER", "description": "ID column"},
{"name": "name", "dataType": "STRING", "description": "Name column"},
],
"tags": [{"name": "important"}],
"owners": [{"name": "Data Team", "email": "data@example.com"}],
}
mock_client_instance.get.return_value = mock_response
client = OpenMetadataClient(
base_url="https://catalog.example.com",
token="test-token",
)
result = client.get_table("bigquery.project.dataset.table")
assert result["name"] == "roi_datamart_v2"
assert len(result["columns"]) == 2
# Verify correct API endpoint and params
mock_client_instance.get.assert_called_once()
call_args = mock_client_instance.get.call_args
assert "/api/v1/tables/name/bigquery.project.dataset.table" in str(call_args)
def test_get_table_http_error():
"""Test get_table() with HTTP error."""
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
mock_client_instance = MagicMock()
mock_client_class.return_value = mock_client_instance
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"401 Unauthorized",
request=MagicMock(),
response=MagicMock(status_code=401),
)
mock_client_instance.get.return_value = mock_response
client = OpenMetadataClient(
base_url="https://catalog.example.com",
token="invalid-token",
)
with pytest.raises(httpx.HTTPStatusError):
client.get_table("bigquery.project.dataset.table")
def test_get_metrics_success():
"""Test successful get_metrics() call."""
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
mock_client_instance = MagicMock()
mock_client_class.return_value = mock_client_instance
mock_response = MagicMock()
mock_response.json.return_value = {
"data": [
{
"id": "metric-1",
"name": "revenue",
"fullyQualifiedName": "metrics.revenue",
"description": "Total revenue",
"expression": "SUM(amount)",
},
{
"id": "metric-2",
"name": "users",
"fullyQualifiedName": "metrics.users",
"description": "Active users",
"expression": "COUNT(DISTINCT user_id)",
},
]
}
mock_client_instance.get.return_value = mock_response
client = OpenMetadataClient(
base_url="https://catalog.example.com",
token="test-token",
)
result = client.get_metrics(limit=10)
assert len(result) == 2
assert result[0]["name"] == "revenue"
assert result[1]["name"] == "users"
def test_context_manager():
"""Test client can be used as context manager."""
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
mock_client_instance = MagicMock()
mock_client_class.return_value = mock_client_instance
with OpenMetadataClient(
base_url="https://catalog.example.com",
token="test-token",
) as client:
assert client is not None
# Verify close() was called
mock_client_instance.close.assert_called_once()