Add OpenMetadata REST API connector and enricher to merge table/column metadata
from OpenMetadata catalog at sync and query time.
Changes:
- connectors/openmetadata/client.py: HTTP client for OM API
- connectors/openmetadata/enricher.py: Data enrichment with TTL cache
- tests/test_openmetadata_*: Unit tests for client and enricher
- src/config.py: Add catalog_fqn field to TableConfig
- src/data_sync.py: Use enricher in _generate_schema_yaml (catalog > BQ API > data_description.md)
- webapp/app.py: Initialize enricher, enrich catalog data with tags/tier/owners/url
- config/instance.yaml.example: Document openmetadata section
Features:
- FQN auto-derivation: bigquery.{table.id}
- TTL cache (default 1h) to avoid repeated API calls
- Graceful degradation: disabled if token missing, silent on HTTP errors
- Column description priority: catalog > BQ API > (none)
- Table description priority: catalog > data_description.md
157 lines
5.3 KiB
Python
157 lines
5.3 KiB
Python
"""
|
|
Tests for OpenMetadata client
|
|
"""
|
|
|
|
import pytest
|
|
import httpx
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
|
|
from connectors.openmetadata.client import OpenMetadataClient
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_httpx_client():
|
|
"""Mock httpx.Client."""
|
|
with patch("connectors.openmetadata.client.httpx.Client") as mock:
|
|
yield mock
|
|
|
|
|
|
def test_client_init(mock_httpx_client):
|
|
"""Test OpenMetadataClient initialization."""
|
|
client = OpenMetadataClient(
|
|
base_url="https://catalog.example.com",
|
|
token="test-token",
|
|
timeout=30,
|
|
)
|
|
|
|
assert client.base_url == "https://catalog.example.com"
|
|
assert client.token == "test-token"
|
|
assert client.timeout == 30
|
|
|
|
# Verify httpx.Client was called with correct headers
|
|
mock_httpx_client.assert_called_once()
|
|
call_kwargs = mock_httpx_client.call_args[1]
|
|
assert call_kwargs["headers"]["Authorization"] == "Bearer test-token"
|
|
|
|
|
|
def test_client_init_strips_trailing_slash():
|
|
"""Test that base_url trailing slash is stripped."""
|
|
with patch("connectors.openmetadata.client.httpx.Client"):
|
|
client = OpenMetadataClient(
|
|
base_url="https://catalog.example.com/",
|
|
token="test-token",
|
|
)
|
|
assert client.base_url == "https://catalog.example.com"
|
|
|
|
|
|
def test_get_table_success():
|
|
"""Test successful get_table() call."""
|
|
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
|
|
mock_client_instance = MagicMock()
|
|
mock_client_class.return_value = mock_client_instance
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.json.return_value = {
|
|
"id": "table-id",
|
|
"name": "roi_datamart_v2",
|
|
"fullyQualifiedName": "bigquery.project.dataset.table",
|
|
"description": "Test table",
|
|
"columns": [
|
|
{"name": "id", "dataType": "INTEGER", "description": "ID column"},
|
|
{"name": "name", "dataType": "STRING", "description": "Name column"},
|
|
],
|
|
"tags": [{"name": "important"}],
|
|
"owners": [{"name": "Data Team", "email": "data@example.com"}],
|
|
}
|
|
mock_client_instance.get.return_value = mock_response
|
|
|
|
client = OpenMetadataClient(
|
|
base_url="https://catalog.example.com",
|
|
token="test-token",
|
|
)
|
|
result = client.get_table("bigquery.project.dataset.table")
|
|
|
|
assert result["name"] == "roi_datamart_v2"
|
|
assert len(result["columns"]) == 2
|
|
|
|
# Verify correct API endpoint and params
|
|
mock_client_instance.get.assert_called_once()
|
|
call_args = mock_client_instance.get.call_args
|
|
assert "/api/v1/tables/name/bigquery.project.dataset.table" in str(call_args)
|
|
|
|
|
|
def test_get_table_http_error():
|
|
"""Test get_table() with HTTP error."""
|
|
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
|
|
mock_client_instance = MagicMock()
|
|
mock_client_class.return_value = mock_client_instance
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
|
"401 Unauthorized",
|
|
request=MagicMock(),
|
|
response=MagicMock(status_code=401),
|
|
)
|
|
mock_client_instance.get.return_value = mock_response
|
|
|
|
client = OpenMetadataClient(
|
|
base_url="https://catalog.example.com",
|
|
token="invalid-token",
|
|
)
|
|
|
|
with pytest.raises(httpx.HTTPStatusError):
|
|
client.get_table("bigquery.project.dataset.table")
|
|
|
|
|
|
def test_get_metrics_success():
|
|
"""Test successful get_metrics() call."""
|
|
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
|
|
mock_client_instance = MagicMock()
|
|
mock_client_class.return_value = mock_client_instance
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.json.return_value = {
|
|
"data": [
|
|
{
|
|
"id": "metric-1",
|
|
"name": "revenue",
|
|
"fullyQualifiedName": "metrics.revenue",
|
|
"description": "Total revenue",
|
|
"expression": "SUM(amount)",
|
|
},
|
|
{
|
|
"id": "metric-2",
|
|
"name": "users",
|
|
"fullyQualifiedName": "metrics.users",
|
|
"description": "Active users",
|
|
"expression": "COUNT(DISTINCT user_id)",
|
|
},
|
|
]
|
|
}
|
|
mock_client_instance.get.return_value = mock_response
|
|
|
|
client = OpenMetadataClient(
|
|
base_url="https://catalog.example.com",
|
|
token="test-token",
|
|
)
|
|
result = client.get_metrics(limit=10)
|
|
|
|
assert len(result) == 2
|
|
assert result[0]["name"] == "revenue"
|
|
assert result[1]["name"] == "users"
|
|
|
|
|
|
def test_context_manager():
|
|
"""Test client can be used as context manager."""
|
|
with patch("connectors.openmetadata.client.httpx.Client") as mock_client_class:
|
|
mock_client_instance = MagicMock()
|
|
mock_client_class.return_value = mock_client_instance
|
|
|
|
with OpenMetadataClient(
|
|
base_url="https://catalog.example.com",
|
|
token="test-token",
|
|
) as client:
|
|
assert client is not None
|
|
|
|
# Verify close() was called
|
|
mock_client_instance.close.assert_called_once()
|