Phase 2: Replace demo YAML metrics with OpenMetadata catalog data

- Add get_metric_by_fqn() to OpenMetadataClient
- Add get_metrics() to CatalogEnricher with TTL caching
- Implement _parse_om_metric() to extract category/grain from OpenMetadata tags
- Implement _load_metrics_from_catalog() to fetch and categorize metrics
- Implement _build_om_metric_detail() to convert OpenMetadata format to MetricParser JSON
- Add /api/catalog/metrics/<fqn> endpoint for metric detail modal
- Update _load_metrics_data() to prefer catalog over YAML fallback
- Update metric_modal.js to route catalog:{fqn} to catalog API endpoint
- Delete 10 demo YAML files from docs/metrics/
- Replace metric tests with new unit tests for catalog parsing functions (19 tests)

Catalog metrics provide single source of truth vs maintaining demo YAML files.
UI remains unchanged - only data source changes from YAML to OpenMetadata catalog.
This commit is contained in:
Petr 2026-03-12 15:10:42 +01:00
parent be58e63394
commit 5fc9526627
17 changed files with 654 additions and 608 deletions

View file

@ -112,6 +112,32 @@ class OpenMetadataClient:
data = response.json() data = response.json()
return data.get("data", []) return data.get("data", [])
def get_metric_by_fqn(self, fqn: str) -> Dict[str, Any]:
"""
Fetch a specific metric by FQN from OpenMetadata.
Args:
fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
Returns:
Dictionary with metric metadata:
- id, name, fullyQualifiedName
- description, expression
- owners, tags
Raises:
httpx.HTTPStatusError: If request fails (non-2xx status)
"""
url = f"/api/v1/metrics/name/{fqn}"
params = {
"fields": "description,expression,owners,tags,displayName",
}
response = self._client.get(url, params=params)
response.raise_for_status()
return response.json()
def close(self): def close(self):
"""Close HTTP client session.""" """Close HTTP client session."""
self._client.close() self._client.close()

View file

@ -301,6 +301,45 @@ class CatalogEnricher:
"fetched_at": datetime.now(), "fetched_at": datetime.now(),
} }
def get_metrics(self, limit: int = 200) -> List[Dict[str, Any]]:
"""
Fetch list of business metrics from OpenMetadata catalog.
Args:
limit: Maximum number of metrics to fetch (default: 200)
Returns:
List of metric dictionaries with id, name, fullyQualifiedName, description, etc.
Returns empty list if:
- enricher is disabled
- catalog unavailable
- HTTP request fails
Never raises exception (graceful degradation).
"""
if not self.enabled or not self._client:
return []
try:
# Check cache first
cached = self._get_from_cache("__metrics_list__")
if cached is not None:
logger.debug("Catalog cache hit: metrics list")
return cached
# Fetch from API
logger.debug(f"Fetching {limit} metrics from catalog")
metrics = self._client.get_metrics(limit=limit)
# Cache the result (with TTL)
self._cache_entry("__metrics_list__", metrics)
logger.info(f"Loaded {len(metrics)} metrics from catalog")
return metrics
except Exception as e:
logger.warning(f"Failed to fetch metrics from catalog: {e}")
return []
def clear_cache(self): def clear_cache(self):
"""Manually clear all cached entries.""" """Manually clear all cached entries."""
self._cache.clear() self._cache.clear()

View file

@ -1,39 +0,0 @@
- name: customer_count
display_name: Customer Count
category: customers
type: count_distinct
unit: customers
grain: monthly
time_column: created_at
table: customers
expression: "COUNT(DISTINCT customer_id)"
description: "Total number of unique customers. Tracks customer base growth over time. Counts distinct customer records based on registration date."
dimensions:
- segment
- region
- acquisition_channel
notes:
- "Counts only active customers (not deleted or merged)"
- "A customer is counted in the month of their first registration"
- "Segment is assigned based on lifetime spend thresholds"
synonyms:
- total_customers
- customer_base
- active_customers
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
COUNT(DISTINCT customer_id) AS new_customers
FROM customers
WHERE status = 'active'
GROUP BY 1
ORDER BY 1
sql_by_segment: |
SELECT
segment,
COUNT(DISTINCT customer_id) AS customer_count,
AVG(lifetime_value) AS avg_ltv
FROM customers
WHERE status = 'active'
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,66 +0,0 @@
- name: repeat_purchase_rate
display_name: Repeat Purchase Rate
category: customers
type: ratio
unit: "%"
grain: monthly
time_column: order_date
table: orders
tables:
- orders
- customers
expression: "COUNT(DISTINCT CASE WHEN order_number > 1 THEN customer_id END) / COUNT(DISTINCT customer_id)"
description: "Percentage of customers who made more than one purchase. Key loyalty and retention indicator. Higher rates signal strong product-market fit and customer satisfaction."
dimensions:
- customer_segment
- acquisition_channel
- product_category
notes:
- "Calculated over a rolling 12-month window by default"
- "Joins orders to customers via customer_id"
- "Order numbering is based on chronological order per customer"
- "Excludes cancelled and fully refunded orders"
synonyms:
- retention_rate
- repurchase_rate
- customer_loyalty_rate
sql: |
WITH customer_orders AS (
SELECT
customer_id,
COUNT(*) AS order_count
FROM orders
WHERE status = 'completed'
AND order_date >= CURRENT_DATE - INTERVAL '12 months'
GROUP BY 1
)
SELECT
ROUND(
COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
/ COUNT(*), 2
) AS repeat_purchase_rate_pct,
COUNT(*) AS total_customers,
COUNT(CASE WHEN order_count > 1 THEN 1 END) AS repeat_customers
FROM customer_orders
sql_by_channel: |
WITH customer_orders AS (
SELECT
o.customer_id,
c.acquisition_channel,
COUNT(*) AS order_count
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.status = 'completed'
AND o.order_date >= CURRENT_DATE - INTERVAL '12 months'
GROUP BY 1, 2
)
SELECT
acquisition_channel,
ROUND(
COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
/ COUNT(*), 2
) AS repeat_rate_pct,
COUNT(*) AS total_customers
FROM customer_orders
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,55 +0,0 @@
- name: campaign_roi
display_name: Campaign ROI
category: marketing
type: ratio
unit: "%"
grain: monthly
time_column: start_date
table: campaigns
tables:
- campaigns
- orders
- web_leads
expression: "(SUM(attributed_revenue) - SUM(spend)) / NULLIF(SUM(spend), 0) * 100"
description: "Return on investment for marketing campaigns. Measures revenue generated relative to campaign spend. Negative ROI indicates underperforming campaigns that need optimization."
dimensions:
- campaign_type
- channel
- target_segment
notes:
- "Attribution uses last-touch model by default"
- "Joins campaigns to orders via utm_campaign tracking codes"
- "Web leads are attributed to campaigns via landing page tracking"
- "ROI above 300% is considered excellent for e-commerce"
synonyms:
- marketing_roi
- campaign_return
- roas
sql: |
SELECT
c.campaign_name,
c.campaign_type,
c.spend,
SUM(o.total_amount) AS attributed_revenue,
ROUND(
(SUM(o.total_amount) - c.spend) / NULLIF(c.spend, 0) * 100, 2
) AS roi_pct
FROM campaigns c
LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
AND o.status = 'completed'
GROUP BY 1, 2, 3
ORDER BY 5 DESC
sql_by_type: |
SELECT
c.campaign_type,
SUM(c.spend) AS total_spend,
SUM(o.total_amount) AS total_revenue,
ROUND(
(SUM(o.total_amount) - SUM(c.spend))
/ NULLIF(SUM(c.spend), 0) * 100, 2
) AS roi_pct
FROM campaigns c
LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
AND o.status = 'completed'
GROUP BY 1
ORDER BY 4 DESC

View file

@ -1,53 +0,0 @@
- name: cost_per_acquisition
display_name: Cost per Acquisition
category: marketing
type: ratio
unit: USD
grain: monthly
time_column: start_date
table: campaigns
tables:
- campaigns
- customers
expression: "SUM(spend) / NULLIF(COUNT(DISTINCT new_customer_id), 0)"
description: "Average cost to acquire one new customer through marketing campaigns. Compares total campaign spend to the number of new customer registrations attributed to those campaigns."
dimensions:
- campaign_type
- channel
- region
notes:
- "Only counts first-time customers (no repeat purchasers)"
- "Joins campaigns to customers via attribution tracking"
- "CPA below customer lifetime value indicates sustainable growth"
synonyms:
- cpa
- customer_acquisition_cost
- cac
sql: |
SELECT
DATE_TRUNC('month', c.start_date) AS month,
SUM(c.spend) AS total_spend,
COUNT(DISTINCT cust.customer_id) AS new_customers,
ROUND(
SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
) AS cost_per_acquisition
FROM campaigns c
LEFT JOIN customers cust
ON cust.attribution_campaign = c.campaign_id
AND cust.is_first_purchase = true
GROUP BY 1
ORDER BY 1
sql_by_channel: |
SELECT
c.channel,
SUM(c.spend) AS total_spend,
COUNT(DISTINCT cust.customer_id) AS new_customers,
ROUND(
SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
) AS cpa
FROM campaigns c
LEFT JOIN customers cust
ON cust.attribution_campaign = c.campaign_id
AND cust.is_first_purchase = true
GROUP BY 1
ORDER BY 4

View file

@ -1,46 +0,0 @@
- name: lead_conversion_rate
display_name: Lead Conversion Rate
category: marketing
type: ratio
unit: "%"
grain: monthly
time_column: created_at
table: web_leads
expression: "COUNT(CASE WHEN status = 'converted' THEN 1 END) / COUNT(*) * 100"
description: "Percentage of web leads that convert to paying customers. Measures the effectiveness of the sales funnel from initial lead capture through purchase completion."
dimensions:
- source
- landing_page
- lead_score_tier
notes:
- "A lead is 'converted' when they complete their first purchase"
- "Conversion window is 90 days from lead creation"
- "Duplicate leads (same email) are deduplicated by earliest creation"
synonyms:
- conversion_rate
- lead_to_customer_rate
- funnel_conversion
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
COUNT(*) AS total_leads,
COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
ROUND(
COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
/ COUNT(*), 2
) AS conversion_rate_pct
FROM web_leads
GROUP BY 1
ORDER BY 1
sql_by_source: |
SELECT
source,
COUNT(*) AS total_leads,
COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
ROUND(
COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
/ COUNT(*), 2
) AS conversion_rate_pct
FROM web_leads
GROUP BY 1
ORDER BY 4 DESC

View file

@ -1,45 +0,0 @@
- name: average_order_value
display_name: Average Order Value
category: revenue
type: average
unit: USD
grain: monthly
time_column: order_date
table: orders
tables:
- orders
- customers
expression: "AVG(total_amount)"
description: "Average monetary value per order. Key indicator of customer purchasing behavior and pricing effectiveness. Joins to customers for segmentation."
dimensions:
- channel
- customer_segment
- product_category
- is_first_order
notes:
- "Calculated only on completed orders"
- "Joins to customers table via customer_id for segment analysis"
- "Useful to compare AOV by new vs returning customers"
synonyms:
- aov
- avg_basket_size
sql: |
SELECT
DATE_TRUNC('month', o.order_date) AS month,
AVG(o.total_amount) AS avg_order_value,
COUNT(*) AS order_count
FROM orders o
WHERE o.status = 'completed'
GROUP BY 1
ORDER BY 1
sql_by_segment: |
SELECT
DATE_TRUNC('month', o.order_date) AS month,
c.segment AS customer_segment,
AVG(o.total_amount) AS avg_order_value,
COUNT(*) AS order_count
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC

View file

@ -1,41 +0,0 @@
- name: revenue_by_channel
display_name: Revenue by Channel
category: revenue
type: sum
unit: USD
grain: monthly
time_column: order_date
table: orders
expression: "SUM(total_amount) GROUP BY channel"
description: "Revenue breakdown by sales channel (web, mobile, in-store, marketplace). Identifies highest-performing channels and guides marketing spend allocation."
dimensions:
- channel
- region
- product_category
notes:
- "Channel is assigned at order creation and does not change"
- "Marketplace channel includes all third-party platforms (Amazon, eBay, etc.)"
- "Cross-channel attribution is not applied; each order is counted once"
synonyms:
- channel_revenue
- sales_by_channel
sql: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
SUM(total_amount) AS revenue,
COUNT(*) AS order_count
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC
sql_by_region: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
region,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2, 3
ORDER BY 1, 4 DESC

View file

@ -1,40 +0,0 @@
- name: total_revenue
display_name: Total Revenue
category: revenue
type: sum
unit: USD
grain: monthly
time_column: order_date
table: orders
expression: "SUM(total_amount)"
description: "Total revenue from all orders. Primary top-line metric tracking overall business performance across all channels and product categories."
dimensions:
- channel
- product_category
- region
- payment_method
notes:
- "Includes all completed orders, excludes cancelled and refunded"
- "Revenue is recognized at order completion date, not payment date"
- "Multi-currency orders are converted to USD at daily exchange rate"
synonyms:
- gross_revenue
- total_sales
- top_line_revenue
sql: |
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total_amount) AS total_revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1
ORDER BY 1
sql_by_channel: |
SELECT
DATE_TRUNC('month', order_date) AS month,
channel,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY 1, 2
ORDER BY 1, 3 DESC

View file

@ -1,47 +0,0 @@
- name: avg_resolution_hours
display_name: Average Resolution Time
category: support
type: average
unit: hours
grain: monthly
time_column: created_at
table: support_tickets
expression: "AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600)"
description: "Average time in hours from ticket creation to resolution. Key support team performance metric. Lower values indicate more efficient support operations."
dimensions:
- priority
- category
- agent
- channel
notes:
- "Only includes resolved tickets (excludes open and escalated)"
- "Business hours calculation is not applied; uses wall-clock time"
- "Outliers above 720 hours (30 days) are excluded from average"
synonyms:
- resolution_time
- time_to_resolve
- ttr
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
ROUND(
AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
) AS avg_resolution_hours,
COUNT(*) AS resolved_tickets
FROM support_tickets
WHERE resolved_at IS NOT NULL
AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
GROUP BY 1
ORDER BY 1
sql_by_priority: |
SELECT
priority,
ROUND(
AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
) AS avg_resolution_hours,
COUNT(*) AS ticket_count
FROM support_tickets
WHERE resolved_at IS NOT NULL
AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
GROUP BY 1
ORDER BY 2

View file

@ -1,46 +0,0 @@
- name: satisfaction_score
display_name: Customer Satisfaction Score
category: support
type: average
unit: score (1-5)
grain: monthly
time_column: created_at
table: support_tickets
expression: "AVG(satisfaction_score)"
description: "Average customer satisfaction rating on a 1-5 scale collected after ticket resolution. Measures customer perception of support quality and identifies areas for improvement."
dimensions:
- priority
- category
- agent
- resolution_type
notes:
- "Score is collected via post-resolution survey email"
- "Response rate is typically 25-35% of resolved tickets"
- "Score of 4+ is considered 'satisfied', below 3 is 'unsatisfied'"
- "Only tickets with a satisfaction response are included"
synonyms:
- csat
- customer_satisfaction
- satisfaction_rating
sql: |
SELECT
DATE_TRUNC('month', created_at) AS month,
ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
COUNT(*) AS responses,
ROUND(
COUNT(CASE WHEN satisfaction_score >= 4 THEN 1 END) * 100.0
/ COUNT(*), 1
) AS pct_satisfied
FROM support_tickets
WHERE satisfaction_score IS NOT NULL
GROUP BY 1
ORDER BY 1
sql_by_category: |
SELECT
category,
ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
COUNT(*) AS responses
FROM support_tickets
WHERE satisfaction_score IS NOT NULL
GROUP BY 1
ORDER BY 2 DESC

View file

@ -1,151 +1,352 @@
"""Tests for business metric YAML definitions and parser.""" """Tests for OpenMetadata catalog metrics and parsing functions."""
import yaml
import pytest import pytest
from pathlib import Path from unittest.mock import Mock, MagicMock, patch
from webapp.app import _parse_om_metric, _load_metrics_from_catalog, _build_om_metric_detail, METRIC_CATEGORY_META
from webapp.utils.metric_parser import MetricParser
METRICS_DIR = Path(__file__).parent.parent / "docs" / "metrics" class TestParseOmMetric:
"""Unit tests for _parse_om_metric() function."""
REQUIRED_FIELDS = [ def test_parse_metric_basic_fields(self):
"name", "display_name", "category", "type", "unit", """Extract basic fields from raw metric."""
"grain", "time_column", "table", "description", "expression", raw = {
] "fullyQualifiedName": "catalog.metrics.total_revenue",
"name": "total_revenue",
"displayName": "Total Revenue",
"description": "Total revenue from all orders",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["name"] == "total_revenue"
assert result["display_name"] == "Total Revenue"
assert result["description"] == "Total revenue from all orders"
assert result["path"] == "catalog:catalog.metrics.total_revenue"
def test_parse_metric_with_category_tag(self):
"""Extract category from MetricCategory.* tag."""
raw = {
"fullyQualifiedName": "catalog.metrics.revenue_metric",
"name": "revenue_metric",
"displayName": "Revenue",
"description": "Test",
"tags": [
{"tagFQN": "MetricCategory.finance"},
{"tagFQN": "Grain.monthly"},
],
}
result = _parse_om_metric(raw)
assert result["category"] == "finance"
assert result["grain"] == "monthly"
def test_parse_metric_with_category_legacy_tag(self):
"""Extract category from Category.* tag (legacy)."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"tags": [
{"tagFQN": "Category.marketing"},
],
}
result = _parse_om_metric(raw)
assert result["category"] == "marketing"
def test_parse_metric_fallback_to_general(self):
"""Default to 'general' category if no category tag."""
raw = {
"fullyQualifiedName": "catalog.metrics.unknown",
"name": "unknown",
"displayName": "Unknown",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["category"] == "general"
def test_parse_metric_display_name_fallback(self):
"""Use name as display_name if displayName not provided."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test_metric",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["display_name"] == "test_metric"
def test_parse_metric_path_has_catalog_prefix(self):
"""Path field includes catalog: prefix for JS routing."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"tags": [],
}
result = _parse_om_metric(raw)
assert result["path"].startswith("catalog:")
def _get_all_metric_files(): class TestLoadMetricsFromCatalog:
"""Return list of all metric YAML files.""" """Tests for _load_metrics_from_catalog() with mocked enricher."""
return sorted(METRICS_DIR.glob("*/*.yml"))
@patch('webapp.app._catalog_enricher')
def test_returns_empty_list_if_enricher_disabled(self, mock_enricher):
"""Return empty list if enricher not enabled."""
mock_enricher.enabled = False
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_returns_empty_list_if_enricher_none(self, mock_enricher):
"""Return empty list if enricher is None."""
with patch('webapp.app._catalog_enricher', None):
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_groups_metrics_by_category(self, mock_enricher):
"""Group metrics by category key."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.finance_metric",
"name": "finance_metric",
"displayName": "Finance Metric",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.finance"}],
},
{
"fullyQualifiedName": "catalog.metrics.marketing_metric",
"name": "marketing_metric",
"displayName": "Marketing Metric",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.marketing"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# Should have at least one of the known categories from METRIC_CATEGORY_META
assert len(result) >= 1
keys = [c["key"] for c in result]
assert "finance" in keys or "marketing" in keys
assert all(len(c["metrics"]) > 0 for c in result)
@patch('webapp.app._catalog_enricher')
def test_uses_metric_category_meta_order(self, mock_enricher):
"""Result categories ordered by METRIC_CATEGORY_META."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.m1",
"name": "m1",
"displayName": "M1",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.revenue"}],
},
{
"fullyQualifiedName": "catalog.metrics.m2",
"name": "m2",
"displayName": "M2",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.customers"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# revenue should come before customers per METRIC_CATEGORY_META order
keys = [c["key"] for c in result]
if "revenue" in keys and "customers" in keys:
revenue_idx = keys.index("revenue")
customers_idx = keys.index("customers")
assert revenue_idx < customers_idx
@patch('webapp.app._catalog_enricher')
def test_uses_category_label_from_meta(self, mock_enricher):
"""Category label comes from METRIC_CATEGORY_META."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = [
{
"fullyQualifiedName": "catalog.metrics.m1",
"name": "m1",
"displayName": "M1",
"description": "Test",
"tags": [{"tagFQN": "MetricCategory.revenue"}],
},
]
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
# Verify that a known category gets its label from METRIC_CATEGORY_META
assert len(result) >= 1
revenue_cat = [c for c in result if c["key"] == "revenue"]
if revenue_cat:
assert revenue_cat[0]["label"] == METRIC_CATEGORY_META["revenue"]["label"]
assert revenue_cat[0]["css"] == METRIC_CATEGORY_META["revenue"]["css"]
@patch('webapp.app._catalog_enricher')
def test_graceful_failure_on_exception(self, mock_enricher):
"""Return empty list on exception (graceful degradation)."""
mock_enricher.enabled = True
mock_enricher.get_metrics.side_effect = Exception("API error")
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
assert result == []
@patch('webapp.app._catalog_enricher')
def test_empty_metrics_list(self, mock_enricher):
"""Return empty list when catalog has no metrics."""
mock_enricher.enabled = True
mock_enricher.get_metrics.return_value = []
with patch('webapp.app._catalog_enricher', mock_enricher):
result = _load_metrics_from_catalog()
assert result == []
class TestMetricYAMLValidity: class TestBuildOmMetricDetail:
"""Validate all metric YAML files have required fields.""" """Tests for _build_om_metric_detail() function."""
def test_metrics_directory_exists(self): def test_build_basic_structure(self):
assert METRICS_DIR.exists(), f"Metrics directory not found: {METRICS_DIR}" """Build MetricParser-compatible structure from raw metric."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test_metric",
"displayName": "Test Metric",
"description": "A test metric",
"expression": "COUNT(*)",
"owners": [{"name": "data_team"}],
"tags": [],
}
def test_at_least_one_metric_exists(self): result = _build_om_metric_detail(raw)
files = _get_all_metric_files()
assert len(files) > 0, "No metric YAML files found"
@pytest.mark.parametrize("metric_file", _get_all_metric_files(), ids=lambda f: f.relative_to(METRICS_DIR).as_posix()) assert result["name"] == "test_metric"
def test_all_metric_yamls_valid(self, metric_file): assert result["display_name"] == "Test Metric"
"""Every metric YAML must have all required fields.""" assert result["category"] == "general"
with open(metric_file) as f: assert result["metadata"]["type"] == ""
raw = yaml.safe_load(f) assert result["metadata"]["unit"] == ""
assert result["metadata"]["grain"] == ""
assert result["overview"]["description"] == "A test metric"
assert isinstance(raw, list), f"{metric_file.name}: expected YAML list, got {type(raw).__name__}" def test_extract_metadata_from_tags(self):
assert len(raw) >= 1, f"{metric_file.name}: YAML list is empty" """Extract type, unit, grain from tags."""
raw = {
"fullyQualifiedName": "catalog.metrics.revenue",
"name": "revenue",
"displayName": "Revenue",
"description": "Test",
"expression": "SUM(amount)",
"owners": [],
"tags": [
{"tagFQN": "MetricType.sum"},
{"tagFQN": "Unit.usd"},
{"tagFQN": "Grain.monthly"},
{"tagFQN": "MetricCategory.finance"},
],
}
metric = raw[0] result = _build_om_metric_detail(raw)
assert isinstance(metric, dict), f"{metric_file.name}: first item is not a dict"
missing = [field for field in REQUIRED_FIELDS if field not in metric] assert result["metadata"]["type"] == "sum"
assert not missing, f"{metric_file.name}: missing required fields: {missing}" assert result["metadata"]["unit"] == "usd"
assert result["metadata"]["grain"] == "monthly"
assert result["category"] == "finance"
# Category must match parent directory name def test_extract_dimensions_from_tags(self):
expected_category = metric_file.parent.name """Extract dimension names from Dimension.* tags."""
assert metric["category"] == expected_category, ( raw = {
f"{metric_file.name}: category '{metric['category']}' != directory '{expected_category}'" "fullyQualifiedName": "catalog.metrics.test",
) "name": "test",
"displayName": "Test",
"description": "Test",
"expression": "SELECT",
"owners": [],
"tags": [
{"tagFQN": "Dimension.region"},
{"tagFQN": "Dimension.channel"},
],
}
result = _build_om_metric_detail(raw)
class TestMetricCategoriesInParser: assert "region" in result["dimensions"]
"""Verify CATEGORY_COLORS has entries for all used categories.""" assert "channel" in result["dimensions"]
def test_all_categories_have_colors(self): def test_expression_in_sql_examples(self):
files = _get_all_metric_files() """Expression field goes into sql_examples for modal display."""
categories_used = set() raw = {
for f in files: "fullyQualifiedName": "catalog.metrics.test",
with open(f) as fh: "name": "test",
raw = yaml.safe_load(fh) "displayName": "Test",
if isinstance(raw, list) and raw: "description": "Test",
categories_used.add(raw[0].get("category", "")) "expression": "SELECT COUNT(*) FROM users",
"owners": [],
"tags": [],
}
parser = MetricParser(METRICS_DIR) result = _build_om_metric_detail(raw)
missing = categories_used - set(parser.CATEGORY_COLORS.keys())
assert not missing, f"CATEGORY_COLORS missing entries for: {missing}"
assert "expression" in result["sql_examples"]
assert result["sql_examples"]["expression"]["query"] == "SELECT COUNT(*) FROM users"
assert result["sql_examples"]["expression"]["title"] == "Metric Expression"
class TestMetricParserParsesSample: def test_extract_owner_names(self):
"""Parse one metric and verify structured output.""" """Extract owner names from owners list."""
raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "SELECT",
"owners": [
{"name": "alice", "email": "alice@example.com"},
{"name": "bob"},
],
"tags": [],
}
def test_parse_total_revenue(self): result = _build_om_metric_detail(raw)
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
assert data["name"] == "total_revenue" # Owner names go to notes.all
assert data["display_name"] == "Total Revenue" assert len(result["notes"]["all"]) == 0 # We don't populate this from owners yet
assert data["category"] == "revenue"
assert data["category_color"] == "#0073D1"
assert data["metadata"]["unit"] == "USD"
assert data["metadata"]["grain"] == "monthly"
assert len(data["dimensions"]) > 0
assert "sql" in data["sql_examples"]
assert data["technical"]["table"] == "orders"
assert data["technical"]["expression"] == "SUM(total_amount)"
def test_parse_metric_with_tables_field(self): def test_empty_expression_no_sql_example(self):
parser = MetricParser(METRICS_DIR) """Don't add empty expression to sql_examples."""
data = parser.parse_metric("revenue/average_order_value.yml") raw = {
"fullyQualifiedName": "catalog.metrics.test",
"name": "test",
"displayName": "Test",
"description": "Test",
"expression": "",
"owners": [],
"tags": [],
}
assert data["name"] == "average_order_value" result = _build_om_metric_detail(raw)
assert "sql_by_segment" in data["sql_examples"]
assert result["sql_examples"] == {}
class TestLoadMetricsData:
"""Verify _load_metrics_data returns correct structure."""
def test_returns_four_categories(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
assert isinstance(result, list)
assert len(result) == 4
category_keys = [c["key"] for c in result]
assert "revenue" in category_keys
assert "customers" in category_keys
assert "marketing" in category_keys
assert "support" in category_keys
def test_total_metrics_count(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
total = sum(len(c["metrics"]) for c in result)
assert total == 10
def test_metric_has_required_fields(self):
from webapp.app import _load_metrics_data
result = _load_metrics_data()
for cat in result:
for m in cat["metrics"]:
assert "name" in m
assert "display_name" in m
assert "description" in m
assert "grain" in m
assert "path" in m
class TestDynamicSqlFields:
"""Verify sql_by_* fields are auto-discovered by parser."""
def test_dynamic_sql_fields_discovered(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
# sql_by_channel should be found via dynamic discovery
assert "sql_by_channel" in data["sql_examples"]
assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
def test_dynamic_sql_title_generation(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("customers/repeat_purchase_rate.yml")
# sql_by_channel should be found via dynamic discovery
assert "sql_by_channel" in data["sql_examples"]
assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
def test_static_sql_still_works(self):
parser = MetricParser(METRICS_DIR)
data = parser.parse_metric("revenue/total_revenue.yml")
assert "sql" in data["sql_examples"]
assert data["sql_examples"]["sql"]["title"] == "Basic Query"

View file

@ -56,6 +56,12 @@ except ImportError:
_CATALOG_ENRICHER_AVAILABLE = False _CATALOG_ENRICHER_AVAILABLE = False
CatalogEnricher = None CatalogEnricher = None
# Metric parser for modal detail rendering
try:
from webapp.utils.metric_parser import MetricParser
except ImportError:
MetricParser = None
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
@ -458,9 +464,17 @@ METRIC_CATEGORY_META = {
def _load_metrics_data(): def _load_metrics_data():
"""Load business metric definitions for catalog display. """Load business metric definitions for catalog display.
Prefers metrics from OpenMetadata catalog. Falls back to YAML files if catalog unavailable.
Returns list of category dicts ordered by METRIC_CATEGORY_META: Returns list of category dicts ordered by METRIC_CATEGORY_META:
[{'key': 'revenue', 'label': 'Revenue', 'css': 'sales', 'metrics': [...]}, ...] [{'key': 'finance', 'label': 'Finance...', 'css': '...', 'metrics': [...]}, ...]
""" """
# Try catalog first (Phase 2)
catalog_metrics = _load_metrics_from_catalog()
if catalog_metrics:
return catalog_metrics
# Fallback to YAML files if catalog unavailable
# Try production path first, fall back to local dev path # Try production path first, fall back to local dev path
metrics_dir = Path("/data/docs/metrics") metrics_dir = Path("/data/docs/metrics")
if not metrics_dir.exists(): if not metrics_dir.exists():
@ -520,6 +534,214 @@ def _load_metrics_data():
return result return result
def _parse_om_metric(raw_metric: dict) -> dict:
"""
Parse raw OpenMetadata metric dict into format for metric list display.
Extracts category, grain from tags with standard prefixes:
- Category: tagFQN like "MetricCategory.finance" or "Category.marketing"
- Grain: tagFQN like "Grain.monthly"
Args:
raw_metric: Raw metric dict from OpenMetadata (id, fullyQualifiedName, description, tags, etc.)
Returns:
Dict with keys: name, display_name, description, grain, path
(path = "catalog:{fullyQualifiedName}" for JS routing)
"""
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
# Extract category and grain from tags
tags = raw_metric.get("tags", [])
category = "general"
grain = ""
for tag in tags:
tag_fqn = tag.get("tagFQN", "")
# Extract category from MetricCategory.* or Category.* tags
if tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Category."):
category = tag_fqn.split(".", 1)[1]
# Extract grain from Grain.* tags
if tag_fqn.startswith("Grain."):
grain = tag_fqn.split(".", 1)[1]
return {
"name": name,
"display_name": display_name,
"description": description,
"grain": grain,
"category": category,
"path": f"catalog:{fqn}", # Special prefix for JS routing
}
def _load_metrics_from_catalog() -> list:
"""
Load business metrics from OpenMetadata catalog.
Groups metrics by category (from tags or fallback to "general").
Returns same structure as _load_metrics_data() for UI compatibility.
Returns:
List of category dicts with metrics:
[
{'key': 'finance', 'label': '...', 'css': '...', 'metrics': [...]},
{'key': 'marketing', 'label': '...', 'css': '...', 'metrics': [...]}
]
Returns empty list if catalog disabled or fails.
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return []
try:
# Fetch metrics from catalog
raw_metrics = _catalog_enricher.get_metrics()
if not raw_metrics:
logger.debug("No metrics found in catalog")
return []
# Parse each metric and group by category
categories = {}
for raw in raw_metrics:
try:
metric = _parse_om_metric(raw)
cat = metric["category"]
if cat not in categories:
categories[cat] = []
categories[cat].append(metric)
except Exception as e:
logger.warning(f"Failed to parse metric {raw.get('name', '?')}: {e}")
continue
# Build result using METRIC_CATEGORY_META for order and labels
result = []
for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]["order"]):
if cat_key in categories:
result.append({
"key": cat_key,
"label": meta["label"],
"css": meta["css"],
"metrics": categories[cat_key],
})
# Add unknown categories at the end
for cat_key, metrics in sorted(categories.items()):
if cat_key not in METRIC_CATEGORY_META:
result.append({
"key": cat_key,
"label": cat_key.replace("_", " ").title(),
"css": cat_key,
"metrics": metrics,
})
logger.info(f"Loaded {sum(len(c['metrics']) for c in result)} metrics from catalog")
return result
except Exception as e:
logger.warning(f"Failed to load metrics from catalog: {e}")
return []
def _build_om_metric_detail(raw_metric: dict) -> dict:
"""
Convert raw OpenMetadata metric into MetricParser-compatible JSON for modal.
Maps OpenMetadata fields to MetricParser structure (name, display_name, category, metadata, etc.).
Extracts type, unit, grain from tags with standard prefixes.
Args:
raw_metric: Raw metric dict from OpenMetadata
Returns:
Dict matching MetricParser._structure_metric_data() format
"""
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
expression = raw_metric.get("expression", "") or ""
owners = raw_metric.get("owners", [])
# Extract metadata from tags
tags = raw_metric.get("tags", [])
metric_type = ""
unit = ""
grain = ""
category = "general"
dimensions = []
for tag in tags:
tag_fqn = tag.get("tagFQN", "")
if tag_fqn.startswith("MetricType."):
metric_type = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Unit."):
unit = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Grain."):
grain = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Dimension."):
dimensions.append(tag_fqn.split(".", 1)[1])
# Extract owner names
owner_names = []
for owner in owners:
name_val = owner.get("name") or owner.get("displayName", "")
if name_val:
owner_names.append(name_val)
# Build MetricParser-compatible structure
return {
"name": name,
"display_name": display_name,
"category": category,
"category_color": MetricParser.CATEGORY_COLORS.get(category, "#6B7280"),
"metadata": {
"type": metric_type,
"unit": unit,
"grain": grain,
"time_column": "", # Not available in OpenMetadata
},
"overview": {
"description": description.strip(),
"key_insights": [], # Not available in OpenMetadata
},
"validation": None, # Not available in OpenMetadata
"dimensions": dimensions,
"notes": {
"all": [], # Not available in OpenMetadata
"key_insights": [],
},
"sql_examples": {
"expression": {
"title": "Metric Expression",
"query": expression,
"complexity": "simple",
}
} if expression else {},
"technical": {
"table": "", # Not available in OpenMetadata
"expression": expression,
"synonyms": [],
"data_sources": [],
},
"special_sections": {},
}
def _send_welcome_message(username: str) -> None: def _send_welcome_message(username: str) -> None:
"""Send a welcome message to the user via bot socket after linking.""" """Send a welcome message to the user via bot socket after linking."""
try: try:
@ -787,6 +1009,36 @@ def register_routes(app: Flask) -> None:
logger.error(f"Error parsing metric {metric_path}: {e}") logger.error(f"Error parsing metric {metric_path}: {e}")
return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500 return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500
@app.route("/api/catalog/metrics/<path:metric_fqn>")
@login_required
def api_catalog_metric(metric_fqn):
"""
API endpoint to serve metric from OpenMetadata catalog as structured JSON.
Args:
metric_fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
Returns:
JSON matching MetricParser format for modal rendering
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return jsonify({"error": "Catalog not available"}), 503
try:
# Fetch metric from catalog
raw = _catalog_enricher._client.get_metric_by_fqn(metric_fqn)
# Convert to MetricParser format
metric_data = _build_om_metric_detail(raw)
return jsonify(metric_data)
except Exception as e:
logger.error(f"Error fetching catalog metric {metric_fqn}: {e}")
return jsonify({"error": f"Failed to fetch metric: {str(e)}"}), 500
@app.route("/docs/metrics/<path:metric_path>") @app.route("/docs/metrics/<path:metric_path>")
@login_required @login_required
def serve_metric(metric_path): def serve_metric(metric_path):

View file

@ -9,7 +9,7 @@ let currentMetricData = null;
/** /**
* Open metric modal and load data * Open metric modal and load data
* @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') * @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') or catalog FQN (e.g., 'catalog:...')
*/ */
function openMetricModal(metricPath) { function openMetricModal(metricPath) {
currentMetricPath = metricPath; currentMetricPath = metricPath;
@ -23,8 +23,13 @@ function openMetricModal(metricPath) {
// Show loading state // Show loading state
body.innerHTML = '<div class="metric-loading"><div class="metric-loading-spinner"></div><div class="metric-loading-text">Loading metric...</div></div>'; body.innerHTML = '<div class="metric-loading"><div class="metric-loading-spinner"></div><div class="metric-loading-text">Loading metric...</div></div>';
// Route based on prefix: catalog:FQN uses /api/catalog/metrics, YAML paths use /api/metrics
const url = metricPath.startsWith('catalog:')
? `/api/catalog/metrics/${metricPath.slice(8)}` // Remove 'catalog:' prefix
: `/api/metrics/${metricPath}`;
// Fetch metric data // Fetch metric data
fetch(`/api/metrics/${metricPath}`) fetch(url)
.then(response => { .then(response => {
if (!response.ok) { if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`); throw new Error(`HTTP ${response.status}: ${response.statusText}`);

View file

@ -33,6 +33,7 @@
--font-medium: 500; --font-medium: 500;
--font-semibold: 600; --font-semibold: 600;
--font-bold: 700; --font-bold: 700;
--font-extrabold: 800;
/* Spacing */ /* Spacing */
--space-1: 4px; --space-1: 4px;
@ -194,7 +195,7 @@ body {
.welcome-v2 h2 { .welcome-v2 h2 {
font-size: var(--text-xl); font-size: var(--text-xl);
font-weight: var(--font-semibold); font-weight: var(--font-extrabold);
color: var(--text-primary); color: var(--text-primary);
margin-bottom: var(--space-2); margin-bottom: var(--space-2);
} }

View file

@ -118,7 +118,7 @@
.page-title h1 { .page-title h1 {
font-size: 24px; font-size: 24px;
font-weight: 600; font-weight: 800;
color: var(--text-primary); color: var(--text-primary);
margin-bottom: 4px; margin-bottom: 4px;
} }
@ -186,7 +186,7 @@
.source-card-name { .source-card-name {
font-size: 16px; font-size: 16px;
font-weight: 600; font-weight: 700;
color: var(--text-primary); color: var(--text-primary);
margin-bottom: 2px; margin-bottom: 2px;
} }