Phase 2: Replace demo YAML metrics with OpenMetadata catalog data

- Add get_metric_by_fqn() to OpenMetadataClient - Add get_metrics() to CatalogEnricher with TTL caching - Implement _parse_om_metric() to extract category/grain from OpenMetadata tags - Implement _load_metrics_from_catalog() to fetch and categorize metrics - Implement _build_om_metric_detail() to convert OpenMetadata format to MetricParser JSON - Add /api/catalog/metrics/<fqn> endpoint for metric detail modal - Update _load_metrics_data() to prefer catalog over YAML fallback - Update metric_modal.js to route catalog:{fqn} to catalog API endpoint - Delete 10 demo YAML files from docs/metrics/ - Replace metric tests with new unit tests for catalog parsing functions (19 tests) Catalog metrics provide single source of truth vs maintaining demo YAML files. UI remains unchanged - only data source changes from YAML to OpenMetadata catalog.
2026-03-12 15:10:42 +01:00 · 2026-03-12 15:10:42 +01:00 · 5fc9526627
commit 5fc9526627
parent be58e63394
17 changed files with 654 additions and 608 deletions
--- a/connectors/openmetadata/client.py
+++ b/connectors/openmetadata/client.py
@ -112,6 +112,32 @@ class OpenMetadataClient:
        data = response.json()
        return data.get("data", [])
    def get_metric_by_fqn(self, fqn: str) -> Dict[str, Any]:
        """
        Fetch a specific metric by FQN from OpenMetadata.
        Args:
            fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
        Returns:
            Dictionary with metric metadata:
            - id, name, fullyQualifiedName
            - description, expression
            - owners, tags
        Raises:
            httpx.HTTPStatusError: If request fails (non-2xx status)
        """
        url = f"/api/v1/metrics/name/{fqn}"
        params = {
            "fields": "description,expression,owners,tags,displayName",
        }
        response = self._client.get(url, params=params)
        response.raise_for_status()
        return response.json()
    def close(self):
        """Close HTTP client session."""
        self._client.close()
--- a/connectors/openmetadata/enricher.py
+++ b/connectors/openmetadata/enricher.py
@ -301,6 +301,45 @@ class CatalogEnricher:
            "fetched_at": datetime.now(),
        }
    def get_metrics(self, limit: int = 200) -> List[Dict[str, Any]]:
        """
        Fetch list of business metrics from OpenMetadata catalog.
        Args:
            limit: Maximum number of metrics to fetch (default: 200)
        Returns:
            List of metric dictionaries with id, name, fullyQualifiedName, description, etc.
            Returns empty list if:
            - enricher is disabled
            - catalog unavailable
            - HTTP request fails
            Never raises exception (graceful degradation).
        """
        if not self.enabled or not self._client:
            return []
        try:
            # Check cache first
            cached = self._get_from_cache("__metrics_list__")
            if cached is not None:
                logger.debug("Catalog cache hit: metrics list")
                return cached
            # Fetch from API
            logger.debug(f"Fetching {limit} metrics from catalog")
            metrics = self._client.get_metrics(limit=limit)
            # Cache the result (with TTL)
            self._cache_entry("__metrics_list__", metrics)
            logger.info(f"Loaded {len(metrics)} metrics from catalog")
            return metrics
        except Exception as e:
            logger.warning(f"Failed to fetch metrics from catalog: {e}")
            return []
    def clear_cache(self):
        """Manually clear all cached entries."""
        self._cache.clear()
--- a/docs/metrics/customers/customer_count.yml
+++ b/docs/metrics/customers/customer_count.yml
@ -1,39 +0,0 @@
 - name: customer_count
  display_name: Customer Count
  category: customers
  type: count_distinct
  unit: customers
  grain: monthly
  time_column: created_at
  table: customers
  expression: "COUNT(DISTINCT customer_id)"
  description: "Total number of unique customers. Tracks customer base growth over time. Counts distinct customer records based on registration date."
  dimensions:
    - segment
    - region
    - acquisition_channel
  notes:
    - "Counts only active customers (not deleted or merged)"
    - "A customer is counted in the month of their first registration"
    - "Segment is assigned based on lifetime spend thresholds"
  synonyms:
    - total_customers
    - customer_base
    - active_customers
  sql: |
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        COUNT(DISTINCT customer_id) AS new_customers
    FROM customers
    WHERE status = 'active'
    GROUP BY 1
    ORDER BY 1
  sql_by_segment: |
    SELECT
        segment,
        COUNT(DISTINCT customer_id) AS customer_count,
        AVG(lifetime_value) AS avg_ltv
    FROM customers
    WHERE status = 'active'
    GROUP BY 1
    ORDER BY 2 DESC
--- a/docs/metrics/customers/repeat_purchase_rate.yml
+++ b/docs/metrics/customers/repeat_purchase_rate.yml
@ -1,66 +0,0 @@
 - name: repeat_purchase_rate
  display_name: Repeat Purchase Rate
  category: customers
  type: ratio
  unit: "%"
  grain: monthly
  time_column: order_date
  table: orders
  tables:
    - orders
    - customers
  expression: "COUNT(DISTINCT CASE WHEN order_number > 1 THEN customer_id END) / COUNT(DISTINCT customer_id)"
  description: "Percentage of customers who made more than one purchase. Key loyalty and retention indicator. Higher rates signal strong product-market fit and customer satisfaction."
  dimensions:
    - customer_segment
    - acquisition_channel
    - product_category
  notes:
    - "Calculated over a rolling 12-month window by default"
    - "Joins orders to customers via customer_id"
    - "Order numbering is based on chronological order per customer"
    - "Excludes cancelled and fully refunded orders"
  synonyms:
    - retention_rate
    - repurchase_rate
    - customer_loyalty_rate
  sql: |
    WITH customer_orders AS (
        SELECT
            customer_id,
            COUNT(*) AS order_count
        FROM orders
        WHERE status = 'completed'
            AND order_date >= CURRENT_DATE - INTERVAL '12 months'
        GROUP BY 1
    )
    SELECT
        ROUND(
            COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
            / COUNT(*), 2
        ) AS repeat_purchase_rate_pct,
        COUNT(*) AS total_customers,
        COUNT(CASE WHEN order_count > 1 THEN 1 END) AS repeat_customers
    FROM customer_orders
  sql_by_channel: |
    WITH customer_orders AS (
        SELECT
            o.customer_id,
            c.acquisition_channel,
            COUNT(*) AS order_count
        FROM orders o
        JOIN customers c ON o.customer_id = c.customer_id
        WHERE o.status = 'completed'
            AND o.order_date >= CURRENT_DATE - INTERVAL '12 months'
        GROUP BY 1, 2
    )
    SELECT
        acquisition_channel,
        ROUND(
            COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0
            / COUNT(*), 2
        ) AS repeat_rate_pct,
        COUNT(*) AS total_customers
    FROM customer_orders
    GROUP BY 1
    ORDER BY 2 DESC
--- a/docs/metrics/marketing/campaign_roi.yml
+++ b/docs/metrics/marketing/campaign_roi.yml
@ -1,55 +0,0 @@
 - name: campaign_roi
  display_name: Campaign ROI
  category: marketing
  type: ratio
  unit: "%"
  grain: monthly
  time_column: start_date
  table: campaigns
  tables:
    - campaigns
    - orders
    - web_leads
  expression: "(SUM(attributed_revenue) - SUM(spend)) / NULLIF(SUM(spend), 0) * 100"
  description: "Return on investment for marketing campaigns. Measures revenue generated relative to campaign spend. Negative ROI indicates underperforming campaigns that need optimization."
  dimensions:
    - campaign_type
    - channel
    - target_segment
  notes:
    - "Attribution uses last-touch model by default"
    - "Joins campaigns to orders via utm_campaign tracking codes"
    - "Web leads are attributed to campaigns via landing page tracking"
    - "ROI above 300% is considered excellent for e-commerce"
  synonyms:
    - marketing_roi
    - campaign_return
    - roas
  sql: |
    SELECT
        c.campaign_name,
        c.campaign_type,
        c.spend,
        SUM(o.total_amount) AS attributed_revenue,
        ROUND(
            (SUM(o.total_amount) - c.spend) / NULLIF(c.spend, 0) * 100, 2
        ) AS roi_pct
    FROM campaigns c
    LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
        AND o.status = 'completed'
    GROUP BY 1, 2, 3
    ORDER BY 5 DESC
  sql_by_type: |
    SELECT
        c.campaign_type,
        SUM(c.spend) AS total_spend,
        SUM(o.total_amount) AS total_revenue,
        ROUND(
            (SUM(o.total_amount) - SUM(c.spend))
            / NULLIF(SUM(c.spend), 0) * 100, 2
        ) AS roi_pct
    FROM campaigns c
    LEFT JOIN orders o ON o.utm_campaign = c.campaign_id
        AND o.status = 'completed'
    GROUP BY 1
    ORDER BY 4 DESC
--- a/docs/metrics/marketing/cost_per_acquisition.yml
+++ b/docs/metrics/marketing/cost_per_acquisition.yml
@ -1,53 +0,0 @@
 - name: cost_per_acquisition
  display_name: Cost per Acquisition
  category: marketing
  type: ratio
  unit: USD
  grain: monthly
  time_column: start_date
  table: campaigns
  tables:
    - campaigns
    - customers
  expression: "SUM(spend) / NULLIF(COUNT(DISTINCT new_customer_id), 0)"
  description: "Average cost to acquire one new customer through marketing campaigns. Compares total campaign spend to the number of new customer registrations attributed to those campaigns."
  dimensions:
    - campaign_type
    - channel
    - region
  notes:
    - "Only counts first-time customers (no repeat purchasers)"
    - "Joins campaigns to customers via attribution tracking"
    - "CPA below customer lifetime value indicates sustainable growth"
  synonyms:
    - cpa
    - customer_acquisition_cost
    - cac
  sql: |
    SELECT
        DATE_TRUNC('month', c.start_date) AS month,
        SUM(c.spend) AS total_spend,
        COUNT(DISTINCT cust.customer_id) AS new_customers,
        ROUND(
            SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
        ) AS cost_per_acquisition
    FROM campaigns c
    LEFT JOIN customers cust
        ON cust.attribution_campaign = c.campaign_id
        AND cust.is_first_purchase = true
    GROUP BY 1
    ORDER BY 1
  sql_by_channel: |
    SELECT
        c.channel,
        SUM(c.spend) AS total_spend,
        COUNT(DISTINCT cust.customer_id) AS new_customers,
        ROUND(
            SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2
        ) AS cpa
    FROM campaigns c
    LEFT JOIN customers cust
        ON cust.attribution_campaign = c.campaign_id
        AND cust.is_first_purchase = true
    GROUP BY 1
    ORDER BY 4
--- a/docs/metrics/marketing/lead_conversion_rate.yml
+++ b/docs/metrics/marketing/lead_conversion_rate.yml
@ -1,46 +0,0 @@
 - name: lead_conversion_rate
  display_name: Lead Conversion Rate
  category: marketing
  type: ratio
  unit: "%"
  grain: monthly
  time_column: created_at
  table: web_leads
  expression: "COUNT(CASE WHEN status = 'converted' THEN 1 END) / COUNT(*) * 100"
  description: "Percentage of web leads that convert to paying customers. Measures the effectiveness of the sales funnel from initial lead capture through purchase completion."
  dimensions:
    - source
    - landing_page
    - lead_score_tier
  notes:
    - "A lead is 'converted' when they complete their first purchase"
    - "Conversion window is 90 days from lead creation"
    - "Duplicate leads (same email) are deduplicated by earliest creation"
  synonyms:
    - conversion_rate
    - lead_to_customer_rate
    - funnel_conversion
  sql: |
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        COUNT(*) AS total_leads,
        COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
        ROUND(
            COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
            / COUNT(*), 2
        ) AS conversion_rate_pct
    FROM web_leads
    GROUP BY 1
    ORDER BY 1
  sql_by_source: |
    SELECT
        source,
        COUNT(*) AS total_leads,
        COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted,
        ROUND(
            COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0
            / COUNT(*), 2
        ) AS conversion_rate_pct
    FROM web_leads
    GROUP BY 1
    ORDER BY 4 DESC
--- a/docs/metrics/revenue/average_order_value.yml
+++ b/docs/metrics/revenue/average_order_value.yml
@ -1,45 +0,0 @@
 - name: average_order_value
  display_name: Average Order Value
  category: revenue
  type: average
  unit: USD
  grain: monthly
  time_column: order_date
  table: orders
  tables:
    - orders
    - customers
  expression: "AVG(total_amount)"
  description: "Average monetary value per order. Key indicator of customer purchasing behavior and pricing effectiveness. Joins to customers for segmentation."
  dimensions:
    - channel
    - customer_segment
    - product_category
    - is_first_order
  notes:
    - "Calculated only on completed orders"
    - "Joins to customers table via customer_id for segment analysis"
    - "Useful to compare AOV by new vs returning customers"
  synonyms:
    - aov
    - avg_basket_size
  sql: |
    SELECT
        DATE_TRUNC('month', o.order_date) AS month,
        AVG(o.total_amount) AS avg_order_value,
        COUNT(*) AS order_count
    FROM orders o
    WHERE o.status = 'completed'
    GROUP BY 1
    ORDER BY 1
  sql_by_segment: |
    SELECT
        DATE_TRUNC('month', o.order_date) AS month,
        c.segment AS customer_segment,
        AVG(o.total_amount) AS avg_order_value,
        COUNT(*) AS order_count
    FROM orders o
    JOIN customers c ON o.customer_id = c.customer_id
    WHERE o.status = 'completed'
    GROUP BY 1, 2
    ORDER BY 1, 3 DESC
--- a/docs/metrics/revenue/revenue_by_channel.yml
+++ b/docs/metrics/revenue/revenue_by_channel.yml
@ -1,41 +0,0 @@
 - name: revenue_by_channel
  display_name: Revenue by Channel
  category: revenue
  type: sum
  unit: USD
  grain: monthly
  time_column: order_date
  table: orders
  expression: "SUM(total_amount) GROUP BY channel"
  description: "Revenue breakdown by sales channel (web, mobile, in-store, marketplace). Identifies highest-performing channels and guides marketing spend allocation."
  dimensions:
    - channel
    - region
    - product_category
  notes:
    - "Channel is assigned at order creation and does not change"
    - "Marketplace channel includes all third-party platforms (Amazon, eBay, etc.)"
    - "Cross-channel attribution is not applied; each order is counted once"
  synonyms:
    - channel_revenue
    - sales_by_channel
  sql: |
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        channel,
        SUM(total_amount) AS revenue,
        COUNT(*) AS order_count
    FROM orders
    WHERE status = 'completed'
    GROUP BY 1, 2
    ORDER BY 1, 3 DESC
  sql_by_region: |
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        channel,
        region,
        SUM(total_amount) AS revenue
    FROM orders
    WHERE status = 'completed'
    GROUP BY 1, 2, 3
    ORDER BY 1, 4 DESC
--- a/docs/metrics/revenue/total_revenue.yml
+++ b/docs/metrics/revenue/total_revenue.yml
@ -1,40 +0,0 @@
 - name: total_revenue
  display_name: Total Revenue
  category: revenue
  type: sum
  unit: USD
  grain: monthly
  time_column: order_date
  table: orders
  expression: "SUM(total_amount)"
  description: "Total revenue from all orders. Primary top-line metric tracking overall business performance across all channels and product categories."
  dimensions:
    - channel
    - product_category
    - region
    - payment_method
  notes:
    - "Includes all completed orders, excludes cancelled and refunded"
    - "Revenue is recognized at order completion date, not payment date"
    - "Multi-currency orders are converted to USD at daily exchange rate"
  synonyms:
    - gross_revenue
    - total_sales
    - top_line_revenue
  sql: |
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        SUM(total_amount) AS total_revenue
    FROM orders
    WHERE status = 'completed'
    GROUP BY 1
    ORDER BY 1
  sql_by_channel: |
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        channel,
        SUM(total_amount) AS revenue
    FROM orders
    WHERE status = 'completed'
    GROUP BY 1, 2
    ORDER BY 1, 3 DESC
--- a/docs/metrics/support/avg_resolution_hours.yml
+++ b/docs/metrics/support/avg_resolution_hours.yml
@ -1,47 +0,0 @@
 - name: avg_resolution_hours
  display_name: Average Resolution Time
  category: support
  type: average
  unit: hours
  grain: monthly
  time_column: created_at
  table: support_tickets
  expression: "AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600)"
  description: "Average time in hours from ticket creation to resolution. Key support team performance metric. Lower values indicate more efficient support operations."
  dimensions:
    - priority
    - category
    - agent
    - channel
  notes:
    - "Only includes resolved tickets (excludes open and escalated)"
    - "Business hours calculation is not applied; uses wall-clock time"
    - "Outliers above 720 hours (30 days) are excluded from average"
  synonyms:
    - resolution_time
    - time_to_resolve
    - ttr
  sql: |
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        ROUND(
            AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
        ) AS avg_resolution_hours,
        COUNT(*) AS resolved_tickets
    FROM support_tickets
    WHERE resolved_at IS NOT NULL
        AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
    GROUP BY 1
    ORDER BY 1
  sql_by_priority: |
    SELECT
        priority,
        ROUND(
            AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1
        ) AS avg_resolution_hours,
        COUNT(*) AS ticket_count
    FROM support_tickets
    WHERE resolved_at IS NOT NULL
        AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720
    GROUP BY 1
    ORDER BY 2
--- a/docs/metrics/support/satisfaction_score.yml
+++ b/docs/metrics/support/satisfaction_score.yml
@ -1,46 +0,0 @@
 - name: satisfaction_score
  display_name: Customer Satisfaction Score
  category: support
  type: average
  unit: score (1-5)
  grain: monthly
  time_column: created_at
  table: support_tickets
  expression: "AVG(satisfaction_score)"
  description: "Average customer satisfaction rating on a 1-5 scale collected after ticket resolution. Measures customer perception of support quality and identifies areas for improvement."
  dimensions:
    - priority
    - category
    - agent
    - resolution_type
  notes:
    - "Score is collected via post-resolution survey email"
    - "Response rate is typically 25-35% of resolved tickets"
    - "Score of 4+ is considered 'satisfied', below 3 is 'unsatisfied'"
    - "Only tickets with a satisfaction response are included"
  synonyms:
    - csat
    - customer_satisfaction
    - satisfaction_rating
  sql: |
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
        COUNT(*) AS responses,
        ROUND(
            COUNT(CASE WHEN satisfaction_score >= 4 THEN 1 END) * 100.0
            / COUNT(*), 1
        ) AS pct_satisfied
    FROM support_tickets
    WHERE satisfaction_score IS NOT NULL
    GROUP BY 1
    ORDER BY 1
  sql_by_category: |
    SELECT
        category,
        ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction,
        COUNT(*) AS responses
    FROM support_tickets
    WHERE satisfaction_score IS NOT NULL
    GROUP BY 1
    ORDER BY 2 DESC
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@ -1,151 +1,352 @@
-"""Tests for business metric YAML definitions and parser."""
+"""Tests for OpenMetadata catalog metrics and parsing functions."""
 import yaml
 import pytest
-from pathlib import Path
+from unittest.mock import Mock, MagicMock, patch
-
+from webapp.app import _parse_om_metric, _load_metrics_from_catalog, _build_om_metric_detail, METRIC_CATEGORY_META
 from webapp.utils.metric_parser import MetricParser
-METRICS_DIR = Path(__file__).parent.parent / "docs" / "metrics"
+class TestParseOmMetric:
    """Unit tests for _parse_om_metric() function."""
-REQUIRED_FIELDS = [
+    def test_parse_metric_basic_fields(self):
-    "name", "display_name", "category", "type", "unit",
+        """Extract basic fields from raw metric."""
-    "grain", "time_column", "table", "description", "expression",
+        raw = {
-]
+            "fullyQualifiedName": "catalog.metrics.total_revenue",
            "name": "total_revenue",
            "displayName": "Total Revenue",
            "description": "Total revenue from all orders",
            "tags": [],
        }
        result = _parse_om_metric(raw)
        assert result["name"] == "total_revenue"
        assert result["display_name"] == "Total Revenue"
        assert result["description"] == "Total revenue from all orders"
        assert result["path"] == "catalog:catalog.metrics.total_revenue"
    def test_parse_metric_with_category_tag(self):
        """Extract category from MetricCategory.* tag."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.revenue_metric",
            "name": "revenue_metric",
            "displayName": "Revenue",
            "description": "Test",
            "tags": [
                {"tagFQN": "MetricCategory.finance"},
                {"tagFQN": "Grain.monthly"},
            ],
        }
        result = _parse_om_metric(raw)
        assert result["category"] == "finance"
        assert result["grain"] == "monthly"
    def test_parse_metric_with_category_legacy_tag(self):
        """Extract category from Category.* tag (legacy)."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test",
            "displayName": "Test",
            "description": "Test",
            "tags": [
                {"tagFQN": "Category.marketing"},
            ],
        }
        result = _parse_om_metric(raw)
        assert result["category"] == "marketing"
    def test_parse_metric_fallback_to_general(self):
        """Default to 'general' category if no category tag."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.unknown",
            "name": "unknown",
            "displayName": "Unknown",
            "description": "Test",
            "tags": [],
        }
        result = _parse_om_metric(raw)
        assert result["category"] == "general"
    def test_parse_metric_display_name_fallback(self):
        """Use name as display_name if displayName not provided."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test_metric",
            "description": "Test",
            "tags": [],
        }
        result = _parse_om_metric(raw)
        assert result["display_name"] == "test_metric"
    def test_parse_metric_path_has_catalog_prefix(self):
        """Path field includes catalog: prefix for JS routing."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test",
            "displayName": "Test",
            "description": "Test",
            "tags": [],
        }
        result = _parse_om_metric(raw)
        assert result["path"].startswith("catalog:")
-def _get_all_metric_files():
+class TestLoadMetricsFromCatalog:
-    """Return list of all metric YAML files."""
+    """Tests for _load_metrics_from_catalog() with mocked enricher."""
-    return sorted(METRICS_DIR.glob("*/*.yml"))
+
    @patch('webapp.app._catalog_enricher')
    def test_returns_empty_list_if_enricher_disabled(self, mock_enricher):
        """Return empty list if enricher not enabled."""
        mock_enricher.enabled = False
        result = _load_metrics_from_catalog()
        assert result == []
    @patch('webapp.app._catalog_enricher')
    def test_returns_empty_list_if_enricher_none(self, mock_enricher):
        """Return empty list if enricher is None."""
        with patch('webapp.app._catalog_enricher', None):
            result = _load_metrics_from_catalog()
            assert result == []
    @patch('webapp.app._catalog_enricher')
    def test_groups_metrics_by_category(self, mock_enricher):
        """Group metrics by category key."""
        mock_enricher.enabled = True
        mock_enricher.get_metrics.return_value = [
            {
                "fullyQualifiedName": "catalog.metrics.finance_metric",
                "name": "finance_metric",
                "displayName": "Finance Metric",
                "description": "Test",
                "tags": [{"tagFQN": "MetricCategory.finance"}],
            },
            {
                "fullyQualifiedName": "catalog.metrics.marketing_metric",
                "name": "marketing_metric",
                "displayName": "Marketing Metric",
                "description": "Test",
                "tags": [{"tagFQN": "MetricCategory.marketing"}],
            },
        ]
        with patch('webapp.app._catalog_enricher', mock_enricher):
            result = _load_metrics_from_catalog()
        # Should have at least one of the known categories from METRIC_CATEGORY_META
        assert len(result) >= 1
        keys = [c["key"] for c in result]
        assert "finance" in keys or "marketing" in keys
        assert all(len(c["metrics"]) > 0 for c in result)
    @patch('webapp.app._catalog_enricher')
    def test_uses_metric_category_meta_order(self, mock_enricher):
        """Result categories ordered by METRIC_CATEGORY_META."""
        mock_enricher.enabled = True
        mock_enricher.get_metrics.return_value = [
            {
                "fullyQualifiedName": "catalog.metrics.m1",
                "name": "m1",
                "displayName": "M1",
                "description": "Test",
                "tags": [{"tagFQN": "MetricCategory.revenue"}],
            },
            {
                "fullyQualifiedName": "catalog.metrics.m2",
                "name": "m2",
                "displayName": "M2",
                "description": "Test",
                "tags": [{"tagFQN": "MetricCategory.customers"}],
            },
        ]
        with patch('webapp.app._catalog_enricher', mock_enricher):
            result = _load_metrics_from_catalog()
        # revenue should come before customers per METRIC_CATEGORY_META order
        keys = [c["key"] for c in result]
        if "revenue" in keys and "customers" in keys:
            revenue_idx = keys.index("revenue")
            customers_idx = keys.index("customers")
            assert revenue_idx < customers_idx
    @patch('webapp.app._catalog_enricher')
    def test_uses_category_label_from_meta(self, mock_enricher):
        """Category label comes from METRIC_CATEGORY_META."""
        mock_enricher.enabled = True
        mock_enricher.get_metrics.return_value = [
            {
                "fullyQualifiedName": "catalog.metrics.m1",
                "name": "m1",
                "displayName": "M1",
                "description": "Test",
                "tags": [{"tagFQN": "MetricCategory.revenue"}],
            },
        ]
        with patch('webapp.app._catalog_enricher', mock_enricher):
            result = _load_metrics_from_catalog()
        # Verify that a known category gets its label from METRIC_CATEGORY_META
        assert len(result) >= 1
        revenue_cat = [c for c in result if c["key"] == "revenue"]
        if revenue_cat:
            assert revenue_cat[0]["label"] == METRIC_CATEGORY_META["revenue"]["label"]
            assert revenue_cat[0]["css"] == METRIC_CATEGORY_META["revenue"]["css"]
    @patch('webapp.app._catalog_enricher')
    def test_graceful_failure_on_exception(self, mock_enricher):
        """Return empty list on exception (graceful degradation)."""
        mock_enricher.enabled = True
        mock_enricher.get_metrics.side_effect = Exception("API error")
        with patch('webapp.app._catalog_enricher', mock_enricher):
            result = _load_metrics_from_catalog()
        assert result == []
    @patch('webapp.app._catalog_enricher')
    def test_empty_metrics_list(self, mock_enricher):
        """Return empty list when catalog has no metrics."""
        mock_enricher.enabled = True
        mock_enricher.get_metrics.return_value = []
        with patch('webapp.app._catalog_enricher', mock_enricher):
            result = _load_metrics_from_catalog()
        assert result == []
-class TestMetricYAMLValidity:
+class TestBuildOmMetricDetail:
-    """Validate all metric YAML files have required fields."""
+    """Tests for _build_om_metric_detail() function."""
-    def test_metrics_directory_exists(self):
+    def test_build_basic_structure(self):
-        assert METRICS_DIR.exists(), f"Metrics directory not found: {METRICS_DIR}"
+        """Build MetricParser-compatible structure from raw metric."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test_metric",
            "displayName": "Test Metric",
            "description": "A test metric",
            "expression": "COUNT(*)",
            "owners": [{"name": "data_team"}],
            "tags": [],
        }
-    def test_at_least_one_metric_exists(self):
+        result = _build_om_metric_detail(raw)
        files = _get_all_metric_files()
        assert len(files) > 0, "No metric YAML files found"
-    @pytest.mark.parametrize("metric_file", _get_all_metric_files(), ids=lambda f: f.relative_to(METRICS_DIR).as_posix())
+        assert result["name"] == "test_metric"
-    def test_all_metric_yamls_valid(self, metric_file):
+        assert result["display_name"] == "Test Metric"
-        """Every metric YAML must have all required fields."""
+        assert result["category"] == "general"
-        with open(metric_file) as f:
+        assert result["metadata"]["type"] == ""
-            raw = yaml.safe_load(f)
+        assert result["metadata"]["unit"] == ""
        assert result["metadata"]["grain"] == ""
        assert result["overview"]["description"] == "A test metric"
-        assert isinstance(raw, list), f"{metric_file.name}: expected YAML list, got {type(raw).__name__}"
+    def test_extract_metadata_from_tags(self):
-        assert len(raw) >= 1, f"{metric_file.name}: YAML list is empty"
+        """Extract type, unit, grain from tags."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.revenue",
            "name": "revenue",
            "displayName": "Revenue",
            "description": "Test",
            "expression": "SUM(amount)",
            "owners": [],
            "tags": [
                {"tagFQN": "MetricType.sum"},
                {"tagFQN": "Unit.usd"},
                {"tagFQN": "Grain.monthly"},
                {"tagFQN": "MetricCategory.finance"},
            ],
        }
-        metric = raw[0]
+        result = _build_om_metric_detail(raw)
        assert isinstance(metric, dict), f"{metric_file.name}: first item is not a dict"
-        missing = [field for field in REQUIRED_FIELDS if field not in metric]
+        assert result["metadata"]["type"] == "sum"
-        assert not missing, f"{metric_file.name}: missing required fields: {missing}"
+        assert result["metadata"]["unit"] == "usd"
        assert result["metadata"]["grain"] == "monthly"
        assert result["category"] == "finance"
-        # Category must match parent directory name
+    def test_extract_dimensions_from_tags(self):
-        expected_category = metric_file.parent.name
+        """Extract dimension names from Dimension.* tags."""
-        assert metric["category"] == expected_category, (
+        raw = {
-            f"{metric_file.name}: category '{metric['category']}' != directory '{expected_category}'"
+            "fullyQualifiedName": "catalog.metrics.test",
-        )
+            "name": "test",
            "displayName": "Test",
            "description": "Test",
            "expression": "SELECT",
            "owners": [],
            "tags": [
                {"tagFQN": "Dimension.region"},
                {"tagFQN": "Dimension.channel"},
            ],
        }
        result = _build_om_metric_detail(raw)
-class TestMetricCategoriesInParser:
+        assert "region" in result["dimensions"]
-    """Verify CATEGORY_COLORS has entries for all used categories."""
+        assert "channel" in result["dimensions"]
-    def test_all_categories_have_colors(self):
+    def test_expression_in_sql_examples(self):
-        files = _get_all_metric_files()
+        """Expression field goes into sql_examples for modal display."""
-        categories_used = set()
+        raw = {
-        for f in files:
+            "fullyQualifiedName": "catalog.metrics.test",
-            with open(f) as fh:
+            "name": "test",
-                raw = yaml.safe_load(fh)
+            "displayName": "Test",
-            if isinstance(raw, list) and raw:
+            "description": "Test",
-                categories_used.add(raw[0].get("category", ""))
+            "expression": "SELECT COUNT(*) FROM users",
            "owners": [],
            "tags": [],
        }
-        parser = MetricParser(METRICS_DIR)
+        result = _build_om_metric_detail(raw)
        missing = categories_used - set(parser.CATEGORY_COLORS.keys())
        assert not missing, f"CATEGORY_COLORS missing entries for: {missing}"
        assert "expression" in result["sql_examples"]
        assert result["sql_examples"]["expression"]["query"] == "SELECT COUNT(*) FROM users"
        assert result["sql_examples"]["expression"]["title"] == "Metric Expression"
-class TestMetricParserParsesSample:
+    def test_extract_owner_names(self):
-    """Parse one metric and verify structured output."""
+        """Extract owner names from owners list."""
        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test",
            "displayName": "Test",
            "description": "Test",
            "expression": "SELECT",
            "owners": [
                {"name": "alice", "email": "alice@example.com"},
                {"name": "bob"},
            ],
            "tags": [],
        }
-    def test_parse_total_revenue(self):
+        result = _build_om_metric_detail(raw)
        parser = MetricParser(METRICS_DIR)
        data = parser.parse_metric("revenue/total_revenue.yml")
-        assert data["name"] == "total_revenue"
+        # Owner names go to notes.all
-        assert data["display_name"] == "Total Revenue"
+        assert len(result["notes"]["all"]) == 0  # We don't populate this from owners yet
        assert data["category"] == "revenue"
        assert data["category_color"] == "#0073D1"
        assert data["metadata"]["unit"] == "USD"
        assert data["metadata"]["grain"] == "monthly"
        assert len(data["dimensions"]) > 0
        assert "sql" in data["sql_examples"]
        assert data["technical"]["table"] == "orders"
        assert data["technical"]["expression"] == "SUM(total_amount)"
-    def test_parse_metric_with_tables_field(self):
+    def test_empty_expression_no_sql_example(self):
-        parser = MetricParser(METRICS_DIR)
+        """Don't add empty expression to sql_examples."""
-        data = parser.parse_metric("revenue/average_order_value.yml")
+        raw = {
            "fullyQualifiedName": "catalog.metrics.test",
            "name": "test",
            "displayName": "Test",
            "description": "Test",
            "expression": "",
            "owners": [],
            "tags": [],
        }
-        assert data["name"] == "average_order_value"
+        result = _build_om_metric_detail(raw)
        assert "sql_by_segment" in data["sql_examples"]
-
+        assert result["sql_examples"] == {}
 class TestLoadMetricsData:
    """Verify _load_metrics_data returns correct structure."""
    def test_returns_four_categories(self):
        from webapp.app import _load_metrics_data
        result = _load_metrics_data()
        assert isinstance(result, list)
        assert len(result) == 4
        category_keys = [c["key"] for c in result]
        assert "revenue" in category_keys
        assert "customers" in category_keys
        assert "marketing" in category_keys
        assert "support" in category_keys
    def test_total_metrics_count(self):
        from webapp.app import _load_metrics_data
        result = _load_metrics_data()
        total = sum(len(c["metrics"]) for c in result)
        assert total == 10
    def test_metric_has_required_fields(self):
        from webapp.app import _load_metrics_data
        result = _load_metrics_data()
        for cat in result:
            for m in cat["metrics"]:
                assert "name" in m
                assert "display_name" in m
                assert "description" in m
                assert "grain" in m
                assert "path" in m
 class TestDynamicSqlFields:
    """Verify sql_by_* fields are auto-discovered by parser."""
    def test_dynamic_sql_fields_discovered(self):
        parser = MetricParser(METRICS_DIR)
        data = parser.parse_metric("revenue/total_revenue.yml")
        # sql_by_channel should be found via dynamic discovery
        assert "sql_by_channel" in data["sql_examples"]
        assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
    def test_dynamic_sql_title_generation(self):
        parser = MetricParser(METRICS_DIR)
        data = parser.parse_metric("customers/repeat_purchase_rate.yml")
        # sql_by_channel should be found via dynamic discovery
        assert "sql_by_channel" in data["sql_examples"]
        assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel"
    def test_static_sql_still_works(self):
        parser = MetricParser(METRICS_DIR)
        data = parser.parse_metric("revenue/total_revenue.yml")
        assert "sql" in data["sql_examples"]
        assert data["sql_examples"]["sql"]["title"] == "Basic Query"
--- a/webapp/app.py
+++ b/webapp/app.py
@ -56,6 +56,12 @@ except ImportError:
    _CATALOG_ENRICHER_AVAILABLE = False
    CatalogEnricher = None
 # Metric parser for modal detail rendering
 try:
    from webapp.utils.metric_parser import MetricParser
 except ImportError:
    MetricParser = None
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
@ -458,9 +464,17 @@ METRIC_CATEGORY_META = {
 def _load_metrics_data():
    """Load business metric definitions for catalog display.
    Prefers metrics from OpenMetadata catalog. Falls back to YAML files if catalog unavailable.
    Returns list of category dicts ordered by METRIC_CATEGORY_META:
-    [{'key': 'revenue', 'label': 'Revenue', 'css': 'sales', 'metrics': [...]}, ...]
+    [{'key': 'finance', 'label': 'Finance...', 'css': '...', 'metrics': [...]}, ...]
    """
    # Try catalog first (Phase 2)
    catalog_metrics = _load_metrics_from_catalog()
    if catalog_metrics:
        return catalog_metrics
    # Fallback to YAML files if catalog unavailable
    # Try production path first, fall back to local dev path
    metrics_dir = Path("/data/docs/metrics")
    if not metrics_dir.exists():
@ -520,6 +534,214 @@ def _load_metrics_data():
    return result
 def _parse_om_metric(raw_metric: dict) -> dict:
    """
    Parse raw OpenMetadata metric dict into format for metric list display.
    Extracts category, grain from tags with standard prefixes:
    - Category: tagFQN like "MetricCategory.finance" or "Category.marketing"
    - Grain: tagFQN like "Grain.monthly"
    Args:
        raw_metric: Raw metric dict from OpenMetadata (id, fullyQualifiedName, description, tags, etc.)
    Returns:
        Dict with keys: name, display_name, description, grain, path
        (path = "catalog:{fullyQualifiedName}" for JS routing)
    """
    fqn = raw_metric.get("fullyQualifiedName", "")
    name = raw_metric.get("name", "")
    display_name = raw_metric.get("displayName", name)
    description = raw_metric.get("description", "") or ""
    # Extract category and grain from tags
    tags = raw_metric.get("tags", [])
    category = "general"
    grain = ""
    for tag in tags:
        tag_fqn = tag.get("tagFQN", "")
        # Extract category from MetricCategory.* or Category.* tags
        if tag_fqn.startswith("MetricCategory."):
            category = tag_fqn.split(".", 1)[1]
        elif tag_fqn.startswith("Category."):
            category = tag_fqn.split(".", 1)[1]
        # Extract grain from Grain.* tags
        if tag_fqn.startswith("Grain."):
            grain = tag_fqn.split(".", 1)[1]
    return {
        "name": name,
        "display_name": display_name,
        "description": description,
        "grain": grain,
        "category": category,
        "path": f"catalog:{fqn}",  # Special prefix for JS routing
    }
 def _load_metrics_from_catalog() -> list:
    """
    Load business metrics from OpenMetadata catalog.
    Groups metrics by category (from tags or fallback to "general").
    Returns same structure as _load_metrics_data() for UI compatibility.
    Returns:
        List of category dicts with metrics:
        [
            {'key': 'finance', 'label': '...', 'css': '...', 'metrics': [...]},
            {'key': 'marketing', 'label': '...', 'css': '...', 'metrics': [...]}
        ]
        Returns empty list if catalog disabled or fails.
    """
    global _catalog_enricher
    if not _catalog_enricher or not _catalog_enricher.enabled:
        return []
    try:
        # Fetch metrics from catalog
        raw_metrics = _catalog_enricher.get_metrics()
        if not raw_metrics:
            logger.debug("No metrics found in catalog")
            return []
        # Parse each metric and group by category
        categories = {}
        for raw in raw_metrics:
            try:
                metric = _parse_om_metric(raw)
                cat = metric["category"]
                if cat not in categories:
                    categories[cat] = []
                categories[cat].append(metric)
            except Exception as e:
                logger.warning(f"Failed to parse metric {raw.get('name', '?')}: {e}")
                continue
        # Build result using METRIC_CATEGORY_META for order and labels
        result = []
        for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]["order"]):
            if cat_key in categories:
                result.append({
                    "key": cat_key,
                    "label": meta["label"],
                    "css": meta["css"],
                    "metrics": categories[cat_key],
                })
        # Add unknown categories at the end
        for cat_key, metrics in sorted(categories.items()):
            if cat_key not in METRIC_CATEGORY_META:
                result.append({
                    "key": cat_key,
                    "label": cat_key.replace("_", " ").title(),
                    "css": cat_key,
                    "metrics": metrics,
                })
        logger.info(f"Loaded {sum(len(c['metrics']) for c in result)} metrics from catalog")
        return result
    except Exception as e:
        logger.warning(f"Failed to load metrics from catalog: {e}")
        return []
 def _build_om_metric_detail(raw_metric: dict) -> dict:
    """
    Convert raw OpenMetadata metric into MetricParser-compatible JSON for modal.
    Maps OpenMetadata fields to MetricParser structure (name, display_name, category, metadata, etc.).
    Extracts type, unit, grain from tags with standard prefixes.
    Args:
        raw_metric: Raw metric dict from OpenMetadata
    Returns:
        Dict matching MetricParser._structure_metric_data() format
    """
    fqn = raw_metric.get("fullyQualifiedName", "")
    name = raw_metric.get("name", "")
    display_name = raw_metric.get("displayName", name)
    description = raw_metric.get("description", "") or ""
    expression = raw_metric.get("expression", "") or ""
    owners = raw_metric.get("owners", [])
    # Extract metadata from tags
    tags = raw_metric.get("tags", [])
    metric_type = ""
    unit = ""
    grain = ""
    category = "general"
    dimensions = []
    for tag in tags:
        tag_fqn = tag.get("tagFQN", "")
        if tag_fqn.startswith("MetricType."):
            metric_type = tag_fqn.split(".", 1)[1]
        elif tag_fqn.startswith("Unit."):
            unit = tag_fqn.split(".", 1)[1]
        elif tag_fqn.startswith("Grain."):
            grain = tag_fqn.split(".", 1)[1]
        elif tag_fqn.startswith("MetricCategory."):
            category = tag_fqn.split(".", 1)[1]
        elif tag_fqn.startswith("Dimension."):
            dimensions.append(tag_fqn.split(".", 1)[1])
    # Extract owner names
    owner_names = []
    for owner in owners:
        name_val = owner.get("name") or owner.get("displayName", "")
        if name_val:
            owner_names.append(name_val)
    # Build MetricParser-compatible structure
    return {
        "name": name,
        "display_name": display_name,
        "category": category,
        "category_color": MetricParser.CATEGORY_COLORS.get(category, "#6B7280"),
        "metadata": {
            "type": metric_type,
            "unit": unit,
            "grain": grain,
            "time_column": "",  # Not available in OpenMetadata
        },
        "overview": {
            "description": description.strip(),
            "key_insights": [],  # Not available in OpenMetadata
        },
        "validation": None,  # Not available in OpenMetadata
        "dimensions": dimensions,
        "notes": {
            "all": [],  # Not available in OpenMetadata
            "key_insights": [],
        },
        "sql_examples": {
            "expression": {
                "title": "Metric Expression",
                "query": expression,
                "complexity": "simple",
            }
        } if expression else {},
        "technical": {
            "table": "",  # Not available in OpenMetadata
            "expression": expression,
            "synonyms": [],
            "data_sources": [],
        },
        "special_sections": {},
    }
 def _send_welcome_message(username: str) -> None:
    """Send a welcome message to the user via bot socket after linking."""
    try:
@ -787,6 +1009,36 @@ def register_routes(app: Flask) -> None:
            logger.error(f"Error parsing metric {metric_path}: {e}")
            return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500
    @app.route("/api/catalog/metrics/<path:metric_fqn>")
    @login_required
    def api_catalog_metric(metric_fqn):
        """
        API endpoint to serve metric from OpenMetadata catalog as structured JSON.
        Args:
            metric_fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue")
        Returns:
            JSON matching MetricParser format for modal rendering
        """
        global _catalog_enricher
        if not _catalog_enricher or not _catalog_enricher.enabled:
            return jsonify({"error": "Catalog not available"}), 503
        try:
            # Fetch metric from catalog
            raw = _catalog_enricher._client.get_metric_by_fqn(metric_fqn)
            # Convert to MetricParser format
            metric_data = _build_om_metric_detail(raw)
            return jsonify(metric_data)
        except Exception as e:
            logger.error(f"Error fetching catalog metric {metric_fqn}: {e}")
            return jsonify({"error": f"Failed to fetch metric: {str(e)}"}), 500
    @app.route("/docs/metrics/<path:metric_path>")
    @login_required
    def serve_metric(metric_path):
--- a/webapp/static/js/metric_modal.js
+++ b/webapp/static/js/metric_modal.js
@ -9,7 +9,7 @@ let currentMetricData = null;
 /**
 * Open metric modal and load data
- * @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml')
+ * @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') or catalog FQN (e.g., 'catalog:...')
 */
 function openMetricModal(metricPath) {
    currentMetricPath = metricPath;
@ -23,8 +23,13 @@ function openMetricModal(metricPath) {
    // Show loading state
    body.innerHTML = '<div class="metric-loading"><div class="metric-loading-spinner"></div><div class="metric-loading-text">Loading metric...</div></div>';
    // Route based on prefix: catalog:FQN uses /api/catalog/metrics, YAML paths use /api/metrics
    const url = metricPath.startsWith('catalog:')
        ? `/api/catalog/metrics/${metricPath.slice(8)}`  // Remove 'catalog:' prefix
        : `/api/metrics/${metricPath}`;
    // Fetch metric data
-    fetch(`/api/metrics/${metricPath}`)
+    fetch(url)
        .then(response => {
            if (!response.ok) {
                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
--- a/webapp/static/style-custom.css
+++ b/webapp/static/style-custom.css
@ -33,6 +33,7 @@
    --font-medium: 500;
    --font-semibold: 600;
    --font-bold: 700;
    --font-extrabold: 800;
    /* Spacing */
    --space-1: 4px;
@ -194,7 +195,7 @@ body {
 .welcome-v2 h2 {
    font-size: var(--text-xl);
-    font-weight: var(--font-semibold);
+    font-weight: var(--font-extrabold);
    color: var(--text-primary);
    margin-bottom: var(--space-2);
 }
--- a/webapp/templates/catalog.html
+++ b/webapp/templates/catalog.html
@ -118,7 +118,7 @@
        .page-title h1 {
            font-size: 24px;
-            font-weight: 600;
+            font-weight: 800;
            color: var(--text-primary);
            margin-bottom: 4px;
        }
@ -186,7 +186,7 @@
        .source-card-name {
            font-size: 16px;
-            font-weight: 600;
+            font-weight: 700;
            color: var(--text-primary);
            margin-bottom: 2px;
        }