Phase 2: Replace demo YAML metrics with OpenMetadata catalog data

- Add get_metric_by_fqn() to OpenMetadataClient
- Add get_metrics() to CatalogEnricher with TTL caching
- Implement _parse_om_metric() to extract category/grain from OpenMetadata tags
- Implement _load_metrics_from_catalog() to fetch and categorize metrics
- Implement _build_om_metric_detail() to convert OpenMetadata format to MetricParser JSON
- Add /api/catalog/metrics/<fqn> endpoint for metric detail modal
- Update _load_metrics_data() to prefer catalog over YAML fallback
- Update metric_modal.js to route catalog:{fqn} to catalog API endpoint
- Delete 10 demo YAML files from docs/metrics/
- Replace metric tests with new unit tests for catalog parsing functions (19 tests)

Catalog metrics provide single source of truth vs maintaining demo YAML files.
UI remains unchanged - only data source changes from YAML to OpenMetadata catalog.
This commit is contained in:
Petr 2026-03-12 15:10:42 +01:00
parent be58e63394
commit 5fc9526627
17 changed files with 654 additions and 608 deletions

View file

@ -112,6 +112,32 @@ class OpenMetadataClient:
data = response.json()
return data.get("data", [])
def get_metric_by_fqn(self, fqn: str) -> Dict[str, Any]:
"""
Fetch a specific metric by FQN from OpenMetadata.
Args:
fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
Returns:
Dictionary with metric metadata:
- id, name, fullyQualifiedName
- description, expression
- owners, tags
Raises:
httpx.HTTPStatusError: If request fails (non-2xx status)
"""
url = f"/api/v1/metrics/name/{fqn}"
params = {
"fields": "description,expression,owners,tags,displayName",
}
response = self._client.get(url, params=params)
response.raise_for_status()
return response.json()
def close(self):
"""Close HTTP client session."""
self._client.close()

View file

@ -301,6 +301,45 @@ class CatalogEnricher:
"fetched_at": datetime.now(),
}
def get_metrics(self, limit: int = 200) -> List[Dict[str, Any]]:
"""
Fetch list of business metrics from OpenMetadata catalog.
Args:
limit: Maximum number of metrics to fetch (default: 200)
Returns:
List of metric dictionaries with id, name, fullyQualifiedName, description, etc.
Returns empty list if:
- enricher is disabled
- catalog unavailable
- HTTP request fails
Never raises exception (graceful degradation).
"""
if not self.enabled or not self._client:
return []
try:
# Check cache first
cached = self._get_from_cache("__metrics_list__")
if cached is not None:
logger.debug("Catalog cache hit: metrics list")
return cached
# Fetch from API
logger.debug(f"Fetching {limit} metrics from catalog")
metrics = self._client.get_metrics(limit=limit)
# Cache the result (with TTL)
self._cache_entry("__metrics_list__", metrics)
logger.info(f"Loaded {len(metrics)} metrics from catalog")
return metrics
except Exception as e:
logger.warning(f"Failed to fetch metrics from catalog: {e}")
return []
def clear_cache(self):
"""Manually clear all cached entries."""
self._cache.clear()

View file

@ -1,39 +0,0 @@
- name: customer_count
display_name: Customer Count
category: customers
type: count_distinct
unit: customers
grain: monthly
time_column: created_at
table: customers
expression: "COUNT(DISTINCT customer_id)"
description: "Total number of unique customers. Tracks customer base growth over time. Counts distinct customer records based on registration date."
dimensions:
- segment
- region
- acquisition_channel
notes:
- "Counts only active customers (not deleted or merged)"
- "A customer is counted in the month of their first registration"
- "Segment is assigned based on lifetime spend thresholds"
synonyms:
- total_customers
- customer_base
- active_customers
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
COUNT(DISTINCT customer_id) AS new_customers
FROM customers
WHERE status = 'active'
GROUP BY 1
ORDER BY 1
sql_by_segment: |
SELECT
segment,
COUNT(DISTINCT customer_id) AS customer_count,
AVG(lifetime_value) AS avg_ltv
FROM customers
WHERE status = 'active'
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,66 +0,0 @@
- name: repeat_purchase_rate
display_name: Repeat Purchase Rate
category: customers
type: ratio
unit: "%"
grain: monthly
time_column: order_date
table: orders
tables:
- orders
- customers
expression: "COUNT(DISTINCT CASE WHEN order_number > 1 THEN customer_id END) / COUNT(DISTINCT customer_id)"
description: "Percentage of customers who made more than one purchase. Key loyalty and retention indicator. Higher rates signal strong product-market fit and customer satisfaction."
dimensions:
- customer_segment
- acquisition_channel
- product_category
notes:
- "Calculated over a rolling 12-month window by default"
- "Joins orders to customers via customer_id"
- "Order numbering is based on chronological order per customer"
- "Excludes cancelled and fully refunded orders"
synonyms:
- retention_rate
- repurchase_rate
- customer_loyalty_rate
sql: |
WITH customer_orders AS (
SELECT
customer_id,
COUNT(*) AS order_count
FROM orders
WHERE status = 'completed'
AND order_date >= CURRENT_DATE - INTERVAL '12 months'
GROUP BY 1
)
SELECT
ROUND(
COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
/ COUNT(*), 2
) AS repeat_purchase_rate_pct,
COUNT(*) AS total_customers,
COUNT(CASE WHEN order_count > 1 THEN 1 END) AS repeat_customers
FROM customer_orders
sql_by_channel: |
WITH customer_orders AS (
SELECT
o.customer_id,
c.acquisition_channel,
COUNT(*) AS order_count
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.status = 'completed'
AND o.order_date >= CURRENT_DATE - INTERVAL '12 months'
GROUP BY 1, 2
)
SELECT
acquisition_channel,
ROUND(
COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
/ COUNT(*), 2
) AS repeat_rate_pct,
COUNT(*) AS total_customers
FROM customer_orders
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,55 +0,0 @@
- name: campaign_roi
display_name: Campaign ROI
category: marketing
type: ratio
unit: "%"
grain: monthly
time_column: start_date
table: campaigns
tables:
- campaigns
- orders
- web_leads
expression: "(SUM(attributed_revenue) - SUM(spend)) / NULLIF(SUM(spend), 0) * 100"
description: "Return on investment for marketing campaigns. Measures revenue generated relative to campaign spend. Negative ROI indicates underperforming campaigns that need optimization."
dimensions:
- campaign_type
- channel
- target_segment
notes:
- "Attribution uses last-touch model by default"
- "Joins campaigns to orders via utm_campaign tracking codes"
- "Web leads are attributed to campaigns via landing page tracking"
- "ROI above 300% is considered excellent for e-commerce"
synonyms:
- marketing_roi
- campaign_return
- roas
sql: |
SELECT
c.campaign_name,
c.campaign_type,
c.spend,
SUM(o.total_amount) AS attributed_revenue,
ROUND(
(SUM(o.total_amount) - c.spend) / NULLIF(c.spend, 0) * 100, 2
) AS roi_pct
FROM campaigns c
LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
AND o.status = 'completed'
GROUP BY 1, 2, 3
ORDER BY 5 DESC
sql_by_type: |
SELECT
c.campaign_type,
SUM(c.spend) AS total_spend,
SUM(o.total_amount) AS total_revenue,
ROUND(
(SUM(o.total_amount) - SUM(c.spend))
/ NULLIF(SUM(c.spend), 0) * 100, 2
) AS roi_pct
FROM campaigns c
LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
AND o.status = 'completed'
GROUP BY 1
ORDER BY 4 DESC

View file

@ -1,53 +0,0 @@
- name: cost_per_acquisition
display_name: Cost per Acquisition
category: marketing
type: ratio
unit: USD
grain: monthly
time_column: start_date
table: campaigns
tables:
- campaigns
- customers
expression: "SUM(spend) / NULLIF(COUNT(DISTINCT new_customer_id), 0)"
description: "Average cost to acquire one new customer through marketing campaigns. Compares total campaign spend to the number of new customer registrations attributed to those campaigns."
dimensions:
- campaign_type
- channel
- region
notes:
- "Only counts first-time customers (no repeat purchasers)"
- "Joins campaigns to customers via attribution tracking"
- "CPA below customer lifetime value indicates sustainable growth"
synonyms:
- cpa
- customer_acquisition_cost
- cac
sql: |
SELECT
DATE_TRUNC('month', c.start_date) AS month,
SUM(c.spend) AS total_spend,
COUNT(DISTINCT cust.customer_id) AS new_customers,
ROUND(
SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
) AS cost_per_acquisition
FROM campaigns c
LEFT JOIN customers cust
ON cust.attribution_campaign = c.campaign_id
AND cust.is_first_purchase = true
GROUP BY 1
ORDER BY 1
sql_by_channel: |
SELECT
c.channel,
SUM(c.spend) AS total_spend,
COUNT(DISTINCT cust.customer_id) AS new_customers,
ROUND(
SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
) AS cpa
FROM campaigns c
LEFT JOIN customers cust
ON cust.attribution_campaign = c.campaign_id
AND cust.is_first_purchase = true
GROUP BY 1
ORDER BY 4

View file

@ -1,46 +0,0 @@
- name: lead_conversion_rate
display_name: Lead Conversion Rate
category: marketing
type: ratio
unit: "%"
grain: monthly
time_column: created_at
table: web_leads
expression: "COUNT(CASE WHEN status = 'converted' THEN 1 END) / COUNT(*) * 100"
description: "Percentage of web leads that convert to paying customers. Measures the effectiveness of the sales funnel from initial lead capture through purchase completion."
dimensions:
- source
- landing_page
- lead_score_tier
notes:
- "A lead is 'converted' when they complete their first purchase"
- "Conversion window is 90 days from lead creation"
- "Duplicate leads (same email) are deduplicated by earliest creation"
synonyms:
- conversion_rate
- lead_to_customer_rate
- funnel_conversion
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
COUNT(*) AS total_leads,
COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
ROUND(
COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
/ COUNT(*), 2
) AS conversion_rate_pct
FROM web_leads
GROUP BY 1
ORDER BY 1
sql_by_source: |
SELECT
source,
COUNT(*) AS total_leads,
COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
ROUND(
COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
/ COUNT(*), 2
) AS conversion_rate_pct
FROM web_leads
GROUP BY 1
ORDER BY 4 DESC

View file

@ -1,45 +0,0 @@
- name: average_order_value
display_name: Average Order Value
category: revenue
type: average
unit: USD
grain: monthly
time_column: order_date
table: orders
tables:
- orders
- customers
expression: "AVG(total_amount)"
description: "Average monetary value per order. Key indicator of customer purchasing behavior and pricing effectiveness. Joins to customers for segmentation."
dimensions:
- channel
- customer_segment
- product_category
- is_first_order
notes:
- "Calculated only on completed orders"
- "Joins to customers table via customer_id for segment analysis"
- "Useful to compare AOV by new vs returning customers"
synonyms:
- aov
- avg_basket_size
sql: |
SELECT
DATE_TRUNC('month', o.order_date) AS month,
AVG(o.total_amount) AS avg_order_value,
COUNT(*) AS order_count
FROM orders o
WHERE o.status = 'completed'
GROUP BY 1
ORDER BY 1
sql_by_segment: |
SELECT
DATE_TRUNC('month', o.order_date) AS month,
c.segment AS customer_segment,
AVG(o.total_amount) AS avg_order_value,
COUNT(*) AS order_count
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC

View file

@ -1,41 +0,0 @@
- name: revenue_by_channel
display_name: Revenue by Channel
category: revenue
type: sum
unit: USD
grain: monthly
time_column: order_date
table: orders
expression: "SUM(total_amount) GROUP BY channel"
description: "Revenue breakdown by sales channel (web, mobile, in-store, marketplace). Identifies highest-performing channels and guides marketing spend allocation."
dimensions:
- channel
- region
- product_category
notes:
- "Channel is assigned at order creation and does not change"
- "Marketplace channel includes all third-party platforms (Amazon, eBay, etc.)"
- "Cross-channel attribution is not applied; each order is counted once"
synonyms:
- channel_revenue
- sales_by_channel
sql: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
SUM(total_amount) AS revenue,
COUNT(*) AS order_count
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC
sql_by_region: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
region,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2, 3
ORDER BY 1, 4 DESC

View file

@ -1,40 +0,0 @@
- name: total_revenue
display_name: Total Revenue
category: revenue
type: sum
unit: USD
grain: monthly
time_column: order_date
table: orders
expression: "SUM(total_amount)"
description: "Total revenue from all orders. Primary top-line metric tracking overall business performance across all channels and product categories."
dimensions:
- channel
- product_category
- region
- payment_method
notes:
- "Includes all completed orders, excludes cancelled and refunded"
- "Revenue is recognized at order completion date, not payment date"
- "Multi-currency orders are converted to USD at daily exchange rate"
synonyms:
- gross_revenue
- total_sales
- top_line_revenue
sql: |
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total_amount) AS total_revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1
ORDER BY 1
sql_by_channel: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC

View file

@ -1,47 +0,0 @@
- name: avg_resolution_hours
display_name: Average Resolution Time
category: support
type: average
unit: hours
grain: monthly
time_column: created_at
table: support_tickets
expression: "AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600)"
description: "Average time in hours from ticket creation to resolution. Key support team performance metric. Lower values indicate more efficient support operations."
dimensions:
- priority
- category
- agent
- channel
notes:
- "Only includes resolved tickets (excludes open and escalated)"
- "Business hours calculation is not applied; uses wall-clock time"
- "Outliers above 720 hours (30 days) are excluded from average"
synonyms:
- resolution_time
- time_to_resolve
- ttr
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
ROUND(
AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
) AS avg_resolution_hours,
COUNT(*) AS resolved_tickets
FROM support_tickets
WHERE resolved_at IS NOT NULL
AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
GROUP BY 1
ORDER BY 1
sql_by_priority: |
SELECT
priority,
ROUND(
AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
) AS avg_resolution_hours,
COUNT(*) AS ticket_count
FROM support_tickets
WHERE resolved_at IS NOT NULL
AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
GROUP BY 1
ORDER BY 2

View file

@ -1,46 +0,0 @@
- name: satisfaction_score
display_name: Customer Satisfaction Score
category: support
type: average
unit: score (1-5)
grain: monthly
time_column: created_at
table: support_tickets
expression: "AVG(satisfaction_score)"
description: "Average customer satisfaction rating on a 1-5 scale collected after ticket resolution. Measures customer perception of support quality and identifies areas for improvement."
dimensions:
- priority
- category
- agent
- resolution_type
notes:
- "Score is collected via post-resolution survey email"
- "Response rate is typically 25-35% of resolved tickets"
- "Score of 4+ is considered 'satisfied', below 3 is 'unsatisfied'"
- "Only tickets with a satisfaction response are included"
synonyms:
- csat
- customer_satisfaction
- satisfaction_rating
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
COUNT(*) AS responses,
ROUND(
COUNT(CASE WHEN satisfaction_score >= 4 THEN 1 END) * 100.0
/ COUNT(*), 1
) AS pct_satisfied
FROM support_tickets
WHERE satisfaction_score IS NOT NULL
GROUP BY 1
ORDER BY 1
sql_by_category: |
SELECT
category,
ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
COUNT(*) AS responses
FROM support_tickets
WHERE satisfaction_score IS NOT NULL
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,151 +1,352 @@
"""Tests for business metric YAML definitions and parser."""
"""Tests for OpenMetadata catalog metrics and parsing functions."""
import yaml
import pytest
from pathlib import Path
from webapp.utils.metric_parser import MetricParser
from unittest.mock import Mock, MagicMock, patch
from webapp.app import _parse_om_metric, _load_metrics_from_catalog, _build_om_metric_detail, METRIC_CATEGORY_META
METRICS_DIR = Path(__file__).parent.parent / "docs" / "metrics"
class TestParseOmMetric:
"""Unit tests for _parse_om_metric() function."""
REQUIRED_FIELDS = [
"name", "display_name", "category", "type", "unit",
"grain", "time_column", "table", "description", "expression",
]
def test_parse_metric_basic_fields(self):
"""Extract basic fields from raw metric."""
raw = {
"fullyQualifiedName": "catalog.metrics.total_revenue",
"name": "total_revenue",
"displayName": "Total Revenue",
"description": "Total revenue from all orders",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["name"] == "total_revenue"
assert result["display_name"] == "Total Revenue"
assert result["description"] == "Total revenue from all orders"
assert result["path"] == "catalog:catalog.metrics.total_revenue"
def test_parse_metric_with_category_tag(self):
"""Extract category from MetricCategory.* tag."""
raw = {
"fullyQualifiedName": "catalog.metrics.revenue_metric",
"name": "revenue_metric",
"displayName": "Revenue",
"description": "Test",
"tags": [
{"tagFQN": "MetricCategory.finance"},
{"tagFQN": "Grain.monthly"},
],
}
result = _parse_om_metric(raw)
assert result["category"] == "finance"
assert result["grain"] == "monthly"
def test_parse_metric_with_category_legacy_tag(self):
"""Extract category from Category.* tag (legacy)."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"tags": [
{"tagFQN": "Category.marketing"},
],
}
result = _parse_om_metric(raw)
assert result["category"] == "marketing"
def test_parse_metric_fallback_to_general(self):
"""Default to 'general' category if no category tag."""
raw = {
"fullyQualifiedName": "catalog.metrics.unknown",
"name": "unknown",
"displayName": "Unknown",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["category"] == "general"
def test_parse_metric_display_name_fallback(self):
"""Use name as display_name if displayName not provided."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test_metric",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["display_name"] == "test_metric"
def test_parse_metric_path_has_catalog_prefix(self):
"""Path field includes catalog: prefix for JS routing."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["path"].startswith("catalog:")
def _get_all_metric_files():
"""Return list of all metric YAML files."""
return sorted(METRICS_DIR.glob("*/*.yml"))
class TestLoadMetricsFromCatalog:
"""Tests for _load_metrics_from_catalog() with mocked enricher."""
@patch('webapp.app._catalog_enricher')
def test_returns_empty_list_if_enricher_disabled(self, mock_enricher):
"""Return empty list if enricher not enabled."""
mock_enricher.enabled = False
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_returns_empty_list_if_enricher_none(self, mock_enricher):
"""Return empty list if enricher is None."""
with patch('webapp.app._catalog_enricher', None):
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_groups_metrics_by_category(self, mock_enricher):
"""Group metrics by category key."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.finance_metric",
"name": "finance_metric",
"displayName": "Finance Metric",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.finance"}],
},
{
"fullyQualifiedName": "catalog.metrics.marketing_metric",
"name": "marketing_metric",
"displayName": "Marketing Metric",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.marketing"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# Should have at least one of the known categories from METRIC_CATEGORY_META
assert len(result) >= 1
keys = [c["key"] for c in result]
assert "finance" in keys or "marketing" in keys
assert all(len(c["metrics"]) > 0 for c in result)
@patch('webapp.app._catalog_enricher')
def test_uses_metric_category_meta_order(self, mock_enricher):
"""Result categories ordered by METRIC_CATEGORY_META."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.m1",
"name": "m1",
"displayName": "M1",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.revenue"}],
},
{
"fullyQualifiedName": "catalog.metrics.m2",
"name": "m2",
"displayName": "M2",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.customers"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# revenue should come before customers per METRIC_CATEGORY_META order
keys = [c["key"] for c in result]
if "revenue" in keys and "customers" in keys:
revenue_idx = keys.index("revenue")
customers_idx = keys.index("customers")
assert revenue_idx < customers_idx
@patch('webapp.app._catalog_enricher')
def test_uses_category_label_from_meta(self, mock_enricher):
"""Category label comes from METRIC_CATEGORY_META."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.m1",
"name": "m1",
"displayName": "M1",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.revenue"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# Verify that a known category gets its label from METRIC_CATEGORY_META
assert len(result) >= 1
revenue_cat = [c for c in result if c["key"] == "revenue"]
if revenue_cat:
assert revenue_cat[0]["label"] == METRIC_CATEGORY_META["revenue"]["label"]
assert revenue_cat[0]["css"] == METRIC_CATEGORY_META["revenue"]["css"]
@patch('webapp.app._catalog_enricher')
def test_graceful_failure_on_exception(self, mock_enricher):
"""Return empty list on exception (graceful degradation)."""
mock_enricher.enabled = True
mock_enricher.get_metrics.side_effect = Exception("API error")
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_empty_metrics_list(self, mock_enricher):
"""Return empty list when catalog has no metrics."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = []
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
assert result == []
class TestMetricYAMLValidity:
"""Validate all metric YAML files have required fields."""
class TestBuildOmMetricDetail:
"""Tests for _build_om_metric_detail() function."""
def test_metrics_directory_exists(self):
assert METRICS_DIR.exists(), f"Metrics directory not found: {METRICS_DIR}"
def test_build_basic_structure(self):
"""Build MetricParser-compatible structure from raw metric."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test_metric",
"displayName": "Test Metric",
"description": "A test metric",
"expression": "COUNT(*)",
"owners": [{"name": "data_team"}],
"tags": [],
}
def test_at_least_one_metric_exists(self):
files = _get_all_metric_files()
assert len(files) > 0, "No metric YAML files found"
result = _build_om_metric_detail(raw)
@pytest.mark.parametrize("metric_file", _get_all_metric_files(), ids=lambda f: f.relative_to(METRICS_DIR).as_posix())
def test_all_metric_yamls_valid(self, metric_file):
"""Every metric YAML must have all required fields."""
with open(metric_file) as f:
raw = yaml.safe_load(f)
assert result["name"] == "test_metric"
assert result["display_name"] == "Test Metric"
assert result["category"] == "general"
assert result["metadata"]["type"] == ""
assert result["metadata"]["unit"] == ""
assert result["metadata"]["grain"] == ""
assert result["overview"]["description"] == "A test metric"
assert isinstance(raw, list), f"{metric_file.name}: expected YAML list, got {type(raw).__name__}"
assert len(raw) >= 1, f"{metric_file.name}: YAML list is empty"
def test_extract_metadata_from_tags(self):
"""Extract type, unit, grain from tags."""
raw = {
"fullyQualifiedName": "catalog.metrics.revenue",
"name": "revenue",
"displayName": "Revenue",
"description": "Test",
"expression": "SUM(amount)",
"owners": [],
"tags": [
{"tagFQN": "MetricType.sum"},
{"tagFQN": "Unit.usd"},
{"tagFQN": "Grain.monthly"},
{"tagFQN": "MetricCategory.finance"},
],
}
metric = raw[0]
assert isinstance(metric, dict), f"{metric_file.name}: first item is not a dict"
result = _build_om_metric_detail(raw)
missing = [field for field in REQUIRED_FIELDS if field not in metric]
assert not missing, f"{metric_file.name}: missing required fields: {missing}"
assert result["metadata"]["type"] == "sum"
assert result["metadata"]["unit"] == "usd"
assert result["metadata"]["grain"] == "monthly"
assert result["category"] == "finance"
# Category must match parent directory name
expected_category = metric_file.parent.name
assert metric["category"] == expected_category, (
f"{metric_file.name}: category '{metric['category']}' != directory '{expected_category}'"
)
def test_extract_dimensions_from_tags(self):
"""Extract dimension names from Dimension.* tags."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "SELECT",
"owners": [],
"tags": [
{"tagFQN": "Dimension.region"},
{"tagFQN": "Dimension.channel"},
],
}
result = _build_om_metric_detail(raw)
class TestMetricCategoriesInParser:
"""Verify CATEGORY_COLORS has entries for all used categories."""
assert "region" in result["dimensions"]
assert "channel" in result["dimensions"]
def test_all_categories_have_colors(self):
files = _get_all_metric_files()
categories_used = set()
for f in files:
with open(f) as fh:
raw = yaml.safe_load(fh)
if isinstance(raw, list) and raw:
categories_used.add(raw[0].get("category", ""))
def test_expression_in_sql_examples(self):
"""Expression field goes into sql_examples for modal display."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "SELECT COUNT(*) FROM users",
"owners": [],
"tags": [],
}
parser = MetricParser(METRICS_DIR)
missing = categories_used - set(parser.CATEGORY_COLORS.keys())
assert not missing, f"CATEGORY_COLORS missing entries for: {missing}"
result = _build_om_metric_detail(raw)
assert "expression" in result["sql_examples"]
assert result["sql_examples"]["expression"]["query"] == "SELECT COUNT(*) FROM users"
assert result["sql_examples"]["expression"]["title"] == "Metric Expression"
class TestMetricParserParsesSample:
"""Parse one metric and verify structured output."""
def test_extract_owner_names(self):
"""Extract owner names from owners list."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "SELECT",
"owners": [
{"name": "alice", "email": "alice@example.com"},
{"name": "bob"},
],
"tags": [],
}
def test_parse_total_revenue(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
result = _build_om_metric_detail(raw)
assert data["name"] == "total_revenue"
assert data["display_name"] == "Total Revenue"
assert data["category"] == "revenue"
assert data["category_color"] == "#0073D1"
assert data["metadata"]["unit"] == "USD"
assert data["metadata"]["grain"] == "monthly"
assert len(data["dimensions"]) > 0
assert "sql" in data["sql_examples"]
assert data["technical"]["table"] == "orders"
assert data["technical"]["expression"] == "SUM(total_amount)"
# Owner names go to notes.all
assert len(result["notes"]["all"]) == 0 # We don't populate this from owners yet
def test_parse_metric_with_tables_field(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/average_order_value.yml")
def test_empty_expression_no_sql_example(self):
"""Don't add empty expression to sql_examples."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "",
"owners": [],
"tags": [],
}
assert data["name"] == "average_order_value"
assert "sql_by_segment" in data["sql_examples"]
result = _build_om_metric_detail(raw)
class TestLoadMetricsData:
"""Verify _load_metrics_data returns correct structure."""
def test_returns_four_categories(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
assert isinstance(result, list)
assert len(result) == 4
category_keys = [c["key"] for c in result]
assert "revenue" in category_keys
assert "customers" in category_keys
assert "marketing" in category_keys
assert "support" in category_keys
def test_total_metrics_count(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
total = sum(len(c["metrics"]) for c in result)
assert total == 10
def test_metric_has_required_fields(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
for cat in result:
for m in cat["metrics"]:
assert "name" in m
assert "display_name" in m
assert "description" in m
assert "grain" in m
assert "path" in m
class TestDynamicSqlFields:
"""Verify sql_by_* fields are auto-discovered by parser."""
def test_dynamic_sql_fields_discovered(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
# sql_by_channel should be found via dynamic discovery
assert "sql_by_channel" in data["sql_examples"]
assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
def test_dynamic_sql_title_generation(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("customers/repeat_purchase_rate.yml")
# sql_by_channel should be found via dynamic discovery
assert "sql_by_channel" in data["sql_examples"]
assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
def test_static_sql_still_works(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
assert "sql" in data["sql_examples"]
assert data["sql_examples"]["sql"]["title"] == "Basic Query"
assert result["sql_examples"] == {}

View file

@ -56,6 +56,12 @@ except ImportError:
_CATALOG_ENRICHER_AVAILABLE = False
CatalogEnricher = None
# Metric parser for modal detail rendering
try:
from webapp.utils.metric_parser import MetricParser
except ImportError:
MetricParser = None
# Configure logging
logging.basicConfig(
level=logging.INFO,
@ -458,9 +464,17 @@ METRIC_CATEGORY_META = {
def _load_metrics_data():
"""Load business metric definitions for catalog display.
Prefers metrics from OpenMetadata catalog. Falls back to YAML files if catalog unavailable.
Returns list of category dicts ordered by METRIC_CATEGORY_META:
[{'key': 'revenue', 'label': 'Revenue', 'css': 'sales', 'metrics': [...]}, ...]
[{'key': 'finance', 'label': 'Finance...', 'css': '...', 'metrics': [...]}, ...]
"""
# Try catalog first (Phase 2)
catalog_metrics = _load_metrics_from_catalog()
if catalog_metrics:
return catalog_metrics
# Fallback to YAML files if catalog unavailable
# Try production path first, fall back to local dev path
metrics_dir = Path("/data/docs/metrics")
if not metrics_dir.exists():
@ -520,6 +534,214 @@ def _load_metrics_data():
return result
def _parse_om_metric(raw_metric: dict) -> dict:
"""
Parse raw OpenMetadata metric dict into format for metric list display.
Extracts category, grain from tags with standard prefixes:
- Category: tagFQN like "MetricCategory.finance" or "Category.marketing"
- Grain: tagFQN like "Grain.monthly"
Args:
raw_metric: Raw metric dict from OpenMetadata (id, fullyQualifiedName, description, tags, etc.)
Returns:
Dict with keys: name, display_name, description, grain, path
(path = "catalog:{fullyQualifiedName}" for JS routing)
"""
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
# Extract category and grain from tags
tags = raw_metric.get("tags", [])
category = "general"
grain = ""
for tag in tags:
tag_fqn = tag.get("tagFQN", "")
# Extract category from MetricCategory.* or Category.* tags
if tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Category."):
category = tag_fqn.split(".", 1)[1]
# Extract grain from Grain.* tags
if tag_fqn.startswith("Grain."):
grain = tag_fqn.split(".", 1)[1]
return {
"name": name,
"display_name": display_name,
"description": description,
"grain": grain,
"category": category,
"path": f"catalog:{fqn}", # Special prefix for JS routing
}
def _load_metrics_from_catalog() -> list:
"""
Load business metrics from OpenMetadata catalog.
Groups metrics by category (from tags or fallback to "general").
Returns same structure as _load_metrics_data() for UI compatibility.
Returns:
List of category dicts with metrics:
[
{'key': 'finance', 'label': '...', 'css': '...', 'metrics': [...]},
{'key': 'marketing', 'label': '...', 'css': '...', 'metrics': [...]}
]
Returns empty list if catalog disabled or fails.
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return []
try:
# Fetch metrics from catalog
raw_metrics = _catalog_enricher.get_metrics()
if not raw_metrics:
logger.debug("No metrics found in catalog")
return []
# Parse each metric and group by category
categories = {}
for raw in raw_metrics:
try:
metric = _parse_om_metric(raw)
cat = metric["category"]
if cat not in categories:
categories[cat] = []
categories[cat].append(metric)
except Exception as e:
logger.warning(f"Failed to parse metric {raw.get('name', '?')}: {e}")
continue
# Build result using METRIC_CATEGORY_META for order and labels
result = []
for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]["order"]):
if cat_key in categories:
result.append({
"key": cat_key,
"label": meta["label"],
"css": meta["css"],
"metrics": categories[cat_key],
})
# Add unknown categories at the end
for cat_key, metrics in sorted(categories.items()):
if cat_key not in METRIC_CATEGORY_META:
result.append({
"key": cat_key,
"label": cat_key.replace("_", " ").title(),
"css": cat_key,
"metrics": metrics,
})
logger.info(f"Loaded {sum(len(c['metrics']) for c in result)} metrics from catalog")
return result
except Exception as e:
logger.warning(f"Failed to load metrics from catalog: {e}")
return []
def _build_om_metric_detail(raw_metric: dict) -> dict:
"""
Convert raw OpenMetadata metric into MetricParser-compatible JSON for modal.
Maps OpenMetadata fields to MetricParser structure (name, display_name, category, metadata, etc.).
Extracts type, unit, grain from tags with standard prefixes.
Args:
raw_metric: Raw metric dict from OpenMetadata
Returns:
Dict matching MetricParser._structure_metric_data() format
"""
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
expression = raw_metric.get("expression", "") or ""
owners = raw_metric.get("owners", [])
# Extract metadata from tags
tags = raw_metric.get("tags", [])
metric_type = ""
unit = ""
grain = ""
category = "general"
dimensions = []
for tag in tags:
tag_fqn = tag.get("tagFQN", "")
if tag_fqn.startswith("MetricType."):
metric_type = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Unit."):
unit = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Grain."):
grain = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Dimension."):
dimensions.append(tag_fqn.split(".", 1)[1])
# Extract owner names
owner_names = []
for owner in owners:
name_val = owner.get("name") or owner.get("displayName", "")
if name_val:
owner_names.append(name_val)
# Build MetricParser-compatible structure
return {
"name": name,
"display_name": display_name,
"category": category,
"category_color": MetricParser.CATEGORY_COLORS.get(category, "#6B7280"),
"metadata": {
"type": metric_type,
"unit": unit,
"grain": grain,
"time_column": "", # Not available in OpenMetadata
},
"overview": {
"description": description.strip(),
"key_insights": [], # Not available in OpenMetadata
},
"validation": None, # Not available in OpenMetadata
"dimensions": dimensions,
"notes": {
"all": [], # Not available in OpenMetadata
"key_insights": [],
},
"sql_examples": {
"expression": {
"title": "Metric Expression",
"query": expression,
"complexity": "simple",
}
} if expression else {},
"technical": {
"table": "", # Not available in OpenMetadata
"expression": expression,
"synonyms": [],
"data_sources": [],
},
"special_sections": {},
}
def _send_welcome_message(username: str) -> None:
"""Send a welcome message to the user via bot socket after linking."""
try:
@ -787,6 +1009,36 @@ def register_routes(app: Flask) -> None:
logger.error(f"Error parsing metric {metric_path}: {e}")
return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500
@app.route("/api/catalog/metrics/<path:metric_fqn>")
@login_required
def api_catalog_metric(metric_fqn):
"""
API endpoint to serve metric from OpenMetadata catalog as structured JSON.
Args:
metric_fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
Returns:
JSON matching MetricParser format for modal rendering
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return jsonify({"error": "Catalog not available"}), 503
try:
# Fetch metric from catalog
raw = _catalog_enricher._client.get_metric_by_fqn(metric_fqn)
# Convert to MetricParser format
metric_data = _build_om_metric_detail(raw)
return jsonify(metric_data)
except Exception as e:
logger.error(f"Error fetching catalog metric {metric_fqn}: {e}")
return jsonify({"error": f"Failed to fetch metric: {str(e)}"}), 500
@app.route("/docs/metrics/<path:metric_path>")
@login_required
def serve_metric(metric_path):

View file

@ -9,7 +9,7 @@ let currentMetricData = null;
/**
* Open metric modal and load data
* @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml')
* @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') or catalog FQN (e.g., 'catalog:...')
*/
function openMetricModal(metricPath) {
currentMetricPath = metricPath;
@ -23,8 +23,13 @@ function openMetricModal(metricPath) {
// Show loading state
body.innerHTML = '<div class="metric-loading"><div class="metric-loading-spinner"></div><div class="metric-loading-text">Loading metric...</div></div>';
// Route based on prefix: catalog:FQN uses /api/catalog/metrics, YAML paths use /api/metrics
const url = metricPath.startsWith('catalog:')
? `/api/catalog/metrics/${metricPath.slice(8)}` // Remove 'catalog:' prefix
: `/api/metrics/${metricPath}`;
// Fetch metric data
fetch(`/api/metrics/${metricPath}`)
fetch(url)
.then(response => {
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);

View file

@ -33,6 +33,7 @@
--font-medium: 500;
--font-semibold: 600;
--font-bold: 700;
--font-extrabold: 800;
/* Spacing */
--space-1: 4px;
@ -194,7 +195,7 @@ body {
.welcome-v2 h2 {
font-size: var(--text-xl);
font-weight: var(--font-semibold);
font-weight: var(--font-extrabold);
color: var(--text-primary);
margin-bottom: var(--space-2);
}

View file

@ -118,7 +118,7 @@
.page-title h1 {
font-size: 24px;
font-weight: 600;
font-weight: 800;
color: var(--text-primary);
margin-bottom: 4px;
}
@ -186,7 +186,7 @@
.source-card-name {
font-size: 16px;
font-weight: 600;
font-weight: 700;
color: var(--text-primary);
margin-bottom: 2px;
}