diff --git a/connectors/openmetadata/client.py b/connectors/openmetadata/client.py index 6a766b3..52f95c0 100644 --- a/connectors/openmetadata/client.py +++ b/connectors/openmetadata/client.py @@ -112,6 +112,32 @@ class OpenMetadataClient: data = response.json() return data.get("data", []) + def get_metric_by_fqn(self, fqn: str) -> Dict[str, Any]: + """ + Fetch a specific metric by FQN from OpenMetadata. + + Args: + fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue") + + Returns: + Dictionary with metric metadata: + - id, name, fullyQualifiedName + - description, expression + - owners, tags + + Raises: + httpx.HTTPStatusError: If request fails (non-2xx status) + """ + url = f"/api/v1/metrics/name/{fqn}" + params = { + "fields": "description,expression,owners,tags,displayName", + } + + response = self._client.get(url, params=params) + response.raise_for_status() + + return response.json() + def close(self): """Close HTTP client session.""" self._client.close() diff --git a/connectors/openmetadata/enricher.py b/connectors/openmetadata/enricher.py index edbeedb..e7ebfe0 100644 --- a/connectors/openmetadata/enricher.py +++ b/connectors/openmetadata/enricher.py @@ -301,6 +301,45 @@ class CatalogEnricher: "fetched_at": datetime.now(), } + def get_metrics(self, limit: int = 200) -> List[Dict[str, Any]]: + """ + Fetch list of business metrics from OpenMetadata catalog. + + Args: + limit: Maximum number of metrics to fetch (default: 200) + + Returns: + List of metric dictionaries with id, name, fullyQualifiedName, description, etc. + Returns empty list if: + - enricher is disabled + - catalog unavailable + - HTTP request fails + Never raises exception (graceful degradation). + """ + if not self.enabled or not self._client: + return [] + + try: + # Check cache first + cached = self._get_from_cache("__metrics_list__") + if cached is not None: + logger.debug("Catalog cache hit: metrics list") + return cached + + # Fetch from API + logger.debug(f"Fetching {limit} metrics from catalog") + metrics = self._client.get_metrics(limit=limit) + + # Cache the result (with TTL) + self._cache_entry("__metrics_list__", metrics) + + logger.info(f"Loaded {len(metrics)} metrics from catalog") + return metrics + + except Exception as e: + logger.warning(f"Failed to fetch metrics from catalog: {e}") + return [] + def clear_cache(self): """Manually clear all cached entries.""" self._cache.clear() diff --git a/docs/metrics/customers/customer_count.yml b/docs/metrics/customers/customer_count.yml deleted file mode 100644 index df127f5..0000000 --- a/docs/metrics/customers/customer_count.yml +++ /dev/null @@ -1,39 +0,0 @@ -- name: customer_count - display_name: Customer Count - category: customers - type: count_distinct - unit: customers - grain: monthly - time_column: created_at - table: customers - expression: "COUNT(DISTINCT customer_id)" - description: "Total number of unique customers. Tracks customer base growth over time. Counts distinct customer records based on registration date." - dimensions: - - segment - - region - - acquisition_channel - notes: - - "Counts only active customers (not deleted or merged)" - - "A customer is counted in the month of their first registration" - - "Segment is assigned based on lifetime spend thresholds" - synonyms: - - total_customers - - customer_base - - active_customers - sql: | - SELECT - DATE_TRUNC('month', created_at) AS month, - COUNT(DISTINCT customer_id) AS new_customers - FROM customers - WHERE status = 'active' - GROUP BY 1 - ORDER BY 1 - sql_by_segment: | - SELECT - segment, - COUNT(DISTINCT customer_id) AS customer_count, - AVG(lifetime_value) AS avg_ltv - FROM customers - WHERE status = 'active' - GROUP BY 1 - ORDER BY 2 DESC diff --git a/docs/metrics/customers/repeat_purchase_rate.yml b/docs/metrics/customers/repeat_purchase_rate.yml deleted file mode 100644 index 68e5ee0..0000000 --- a/docs/metrics/customers/repeat_purchase_rate.yml +++ /dev/null @@ -1,66 +0,0 @@ -- name: repeat_purchase_rate - display_name: Repeat Purchase Rate - category: customers - type: ratio - unit: "%" - grain: monthly - time_column: order_date - table: orders - tables: - - orders - - customers - expression: "COUNT(DISTINCT CASE WHEN order_number > 1 THEN customer_id END) / COUNT(DISTINCT customer_id)" - description: "Percentage of customers who made more than one purchase. Key loyalty and retention indicator. Higher rates signal strong product-market fit and customer satisfaction." - dimensions: - - customer_segment - - acquisition_channel - - product_category - notes: - - "Calculated over a rolling 12-month window by default" - - "Joins orders to customers via customer_id" - - "Order numbering is based on chronological order per customer" - - "Excludes cancelled and fully refunded orders" - synonyms: - - retention_rate - - repurchase_rate - - customer_loyalty_rate - sql: | - WITH customer_orders AS ( - SELECT - customer_id, - COUNT(*) AS order_count - FROM orders - WHERE status = 'completed' - AND order_date >= CURRENT_DATE - INTERVAL '12 months' - GROUP BY 1 - ) - SELECT - ROUND( - COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0 - / COUNT(*), 2 - ) AS repeat_purchase_rate_pct, - COUNT(*) AS total_customers, - COUNT(CASE WHEN order_count > 1 THEN 1 END) AS repeat_customers - FROM customer_orders - sql_by_channel: | - WITH customer_orders AS ( - SELECT - o.customer_id, - c.acquisition_channel, - COUNT(*) AS order_count - FROM orders o - JOIN customers c ON o.customer_id = c.customer_id - WHERE o.status = 'completed' - AND o.order_date >= CURRENT_DATE - INTERVAL '12 months' - GROUP BY 1, 2 - ) - SELECT - acquisition_channel, - ROUND( - COUNT(CASE WHEN order_count > 1 THEN 1 END) * 100.0 - / COUNT(*), 2 - ) AS repeat_rate_pct, - COUNT(*) AS total_customers - FROM customer_orders - GROUP BY 1 - ORDER BY 2 DESC diff --git a/docs/metrics/marketing/campaign_roi.yml b/docs/metrics/marketing/campaign_roi.yml deleted file mode 100644 index 0df188d..0000000 --- a/docs/metrics/marketing/campaign_roi.yml +++ /dev/null @@ -1,55 +0,0 @@ -- name: campaign_roi - display_name: Campaign ROI - category: marketing - type: ratio - unit: "%" - grain: monthly - time_column: start_date - table: campaigns - tables: - - campaigns - - orders - - web_leads - expression: "(SUM(attributed_revenue) - SUM(spend)) / NULLIF(SUM(spend), 0) * 100" - description: "Return on investment for marketing campaigns. Measures revenue generated relative to campaign spend. Negative ROI indicates underperforming campaigns that need optimization." - dimensions: - - campaign_type - - channel - - target_segment - notes: - - "Attribution uses last-touch model by default" - - "Joins campaigns to orders via utm_campaign tracking codes" - - "Web leads are attributed to campaigns via landing page tracking" - - "ROI above 300% is considered excellent for e-commerce" - synonyms: - - marketing_roi - - campaign_return - - roas - sql: | - SELECT - c.campaign_name, - c.campaign_type, - c.spend, - SUM(o.total_amount) AS attributed_revenue, - ROUND( - (SUM(o.total_amount) - c.spend) / NULLIF(c.spend, 0) * 100, 2 - ) AS roi_pct - FROM campaigns c - LEFT JOIN orders o ON o.utm_campaign = c.campaign_id - AND o.status = 'completed' - GROUP BY 1, 2, 3 - ORDER BY 5 DESC - sql_by_type: | - SELECT - c.campaign_type, - SUM(c.spend) AS total_spend, - SUM(o.total_amount) AS total_revenue, - ROUND( - (SUM(o.total_amount) - SUM(c.spend)) - / NULLIF(SUM(c.spend), 0) * 100, 2 - ) AS roi_pct - FROM campaigns c - LEFT JOIN orders o ON o.utm_campaign = c.campaign_id - AND o.status = 'completed' - GROUP BY 1 - ORDER BY 4 DESC diff --git a/docs/metrics/marketing/cost_per_acquisition.yml b/docs/metrics/marketing/cost_per_acquisition.yml deleted file mode 100644 index b1ab94f..0000000 --- a/docs/metrics/marketing/cost_per_acquisition.yml +++ /dev/null @@ -1,53 +0,0 @@ -- name: cost_per_acquisition - display_name: Cost per Acquisition - category: marketing - type: ratio - unit: USD - grain: monthly - time_column: start_date - table: campaigns - tables: - - campaigns - - customers - expression: "SUM(spend) / NULLIF(COUNT(DISTINCT new_customer_id), 0)" - description: "Average cost to acquire one new customer through marketing campaigns. Compares total campaign spend to the number of new customer registrations attributed to those campaigns." - dimensions: - - campaign_type - - channel - - region - notes: - - "Only counts first-time customers (no repeat purchasers)" - - "Joins campaigns to customers via attribution tracking" - - "CPA below customer lifetime value indicates sustainable growth" - synonyms: - - cpa - - customer_acquisition_cost - - cac - sql: | - SELECT - DATE_TRUNC('month', c.start_date) AS month, - SUM(c.spend) AS total_spend, - COUNT(DISTINCT cust.customer_id) AS new_customers, - ROUND( - SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2 - ) AS cost_per_acquisition - FROM campaigns c - LEFT JOIN customers cust - ON cust.attribution_campaign = c.campaign_id - AND cust.is_first_purchase = true - GROUP BY 1 - ORDER BY 1 - sql_by_channel: | - SELECT - c.channel, - SUM(c.spend) AS total_spend, - COUNT(DISTINCT cust.customer_id) AS new_customers, - ROUND( - SUM(c.spend) / NULLIF(COUNT(DISTINCT cust.customer_id), 0), 2 - ) AS cpa - FROM campaigns c - LEFT JOIN customers cust - ON cust.attribution_campaign = c.campaign_id - AND cust.is_first_purchase = true - GROUP BY 1 - ORDER BY 4 diff --git a/docs/metrics/marketing/lead_conversion_rate.yml b/docs/metrics/marketing/lead_conversion_rate.yml deleted file mode 100644 index f3469b5..0000000 --- a/docs/metrics/marketing/lead_conversion_rate.yml +++ /dev/null @@ -1,46 +0,0 @@ -- name: lead_conversion_rate - display_name: Lead Conversion Rate - category: marketing - type: ratio - unit: "%" - grain: monthly - time_column: created_at - table: web_leads - expression: "COUNT(CASE WHEN status = 'converted' THEN 1 END) / COUNT(*) * 100" - description: "Percentage of web leads that convert to paying customers. Measures the effectiveness of the sales funnel from initial lead capture through purchase completion." - dimensions: - - source - - landing_page - - lead_score_tier - notes: - - "A lead is 'converted' when they complete their first purchase" - - "Conversion window is 90 days from lead creation" - - "Duplicate leads (same email) are deduplicated by earliest creation" - synonyms: - - conversion_rate - - lead_to_customer_rate - - funnel_conversion - sql: | - SELECT - DATE_TRUNC('month', created_at) AS month, - COUNT(*) AS total_leads, - COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted, - ROUND( - COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0 - / COUNT(*), 2 - ) AS conversion_rate_pct - FROM web_leads - GROUP BY 1 - ORDER BY 1 - sql_by_source: | - SELECT - source, - COUNT(*) AS total_leads, - COUNT(CASE WHEN status = 'converted' THEN 1 END) AS converted, - ROUND( - COUNT(CASE WHEN status = 'converted' THEN 1 END) * 100.0 - / COUNT(*), 2 - ) AS conversion_rate_pct - FROM web_leads - GROUP BY 1 - ORDER BY 4 DESC diff --git a/docs/metrics/revenue/average_order_value.yml b/docs/metrics/revenue/average_order_value.yml deleted file mode 100644 index d761169..0000000 --- a/docs/metrics/revenue/average_order_value.yml +++ /dev/null @@ -1,45 +0,0 @@ -- name: average_order_value - display_name: Average Order Value - category: revenue - type: average - unit: USD - grain: monthly - time_column: order_date - table: orders - tables: - - orders - - customers - expression: "AVG(total_amount)" - description: "Average monetary value per order. Key indicator of customer purchasing behavior and pricing effectiveness. Joins to customers for segmentation." - dimensions: - - channel - - customer_segment - - product_category - - is_first_order - notes: - - "Calculated only on completed orders" - - "Joins to customers table via customer_id for segment analysis" - - "Useful to compare AOV by new vs returning customers" - synonyms: - - aov - - avg_basket_size - sql: | - SELECT - DATE_TRUNC('month', o.order_date) AS month, - AVG(o.total_amount) AS avg_order_value, - COUNT(*) AS order_count - FROM orders o - WHERE o.status = 'completed' - GROUP BY 1 - ORDER BY 1 - sql_by_segment: | - SELECT - DATE_TRUNC('month', o.order_date) AS month, - c.segment AS customer_segment, - AVG(o.total_amount) AS avg_order_value, - COUNT(*) AS order_count - FROM orders o - JOIN customers c ON o.customer_id = c.customer_id - WHERE o.status = 'completed' - GROUP BY 1, 2 - ORDER BY 1, 3 DESC diff --git a/docs/metrics/revenue/revenue_by_channel.yml b/docs/metrics/revenue/revenue_by_channel.yml deleted file mode 100644 index db85aff..0000000 --- a/docs/metrics/revenue/revenue_by_channel.yml +++ /dev/null @@ -1,41 +0,0 @@ -- name: revenue_by_channel - display_name: Revenue by Channel - category: revenue - type: sum - unit: USD - grain: monthly - time_column: order_date - table: orders - expression: "SUM(total_amount) GROUP BY channel" - description: "Revenue breakdown by sales channel (web, mobile, in-store, marketplace). Identifies highest-performing channels and guides marketing spend allocation." - dimensions: - - channel - - region - - product_category - notes: - - "Channel is assigned at order creation and does not change" - - "Marketplace channel includes all third-party platforms (Amazon, eBay, etc.)" - - "Cross-channel attribution is not applied; each order is counted once" - synonyms: - - channel_revenue - - sales_by_channel - sql: | - SELECT - DATE_TRUNC('month', order_date) AS month, - channel, - SUM(total_amount) AS revenue, - COUNT(*) AS order_count - FROM orders - WHERE status = 'completed' - GROUP BY 1, 2 - ORDER BY 1, 3 DESC - sql_by_region: | - SELECT - DATE_TRUNC('month', order_date) AS month, - channel, - region, - SUM(total_amount) AS revenue - FROM orders - WHERE status = 'completed' - GROUP BY 1, 2, 3 - ORDER BY 1, 4 DESC diff --git a/docs/metrics/revenue/total_revenue.yml b/docs/metrics/revenue/total_revenue.yml deleted file mode 100644 index 812a91b..0000000 --- a/docs/metrics/revenue/total_revenue.yml +++ /dev/null @@ -1,40 +0,0 @@ -- name: total_revenue - display_name: Total Revenue - category: revenue - type: sum - unit: USD - grain: monthly - time_column: order_date - table: orders - expression: "SUM(total_amount)" - description: "Total revenue from all orders. Primary top-line metric tracking overall business performance across all channels and product categories." - dimensions: - - channel - - product_category - - region - - payment_method - notes: - - "Includes all completed orders, excludes cancelled and refunded" - - "Revenue is recognized at order completion date, not payment date" - - "Multi-currency orders are converted to USD at daily exchange rate" - synonyms: - - gross_revenue - - total_sales - - top_line_revenue - sql: | - SELECT - DATE_TRUNC('month', order_date) AS month, - SUM(total_amount) AS total_revenue - FROM orders - WHERE status = 'completed' - GROUP BY 1 - ORDER BY 1 - sql_by_channel: | - SELECT - DATE_TRUNC('month', order_date) AS month, - channel, - SUM(total_amount) AS revenue - FROM orders - WHERE status = 'completed' - GROUP BY 1, 2 - ORDER BY 1, 3 DESC diff --git a/docs/metrics/support/avg_resolution_hours.yml b/docs/metrics/support/avg_resolution_hours.yml deleted file mode 100644 index bbe8da9..0000000 --- a/docs/metrics/support/avg_resolution_hours.yml +++ /dev/null @@ -1,47 +0,0 @@ -- name: avg_resolution_hours - display_name: Average Resolution Time - category: support - type: average - unit: hours - grain: monthly - time_column: created_at - table: support_tickets - expression: "AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600)" - description: "Average time in hours from ticket creation to resolution. Key support team performance metric. Lower values indicate more efficient support operations." - dimensions: - - priority - - category - - agent - - channel - notes: - - "Only includes resolved tickets (excludes open and escalated)" - - "Business hours calculation is not applied; uses wall-clock time" - - "Outliers above 720 hours (30 days) are excluded from average" - synonyms: - - resolution_time - - time_to_resolve - - ttr - sql: | - SELECT - DATE_TRUNC('month', created_at) AS month, - ROUND( - AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1 - ) AS avg_resolution_hours, - COUNT(*) AS resolved_tickets - FROM support_tickets - WHERE resolved_at IS NOT NULL - AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720 - GROUP BY 1 - ORDER BY 1 - sql_by_priority: | - SELECT - priority, - ROUND( - AVG(EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600), 1 - ) AS avg_resolution_hours, - COUNT(*) AS ticket_count - FROM support_tickets - WHERE resolved_at IS NOT NULL - AND EXTRACT(EPOCH FROM (resolved_at - created_at)) / 3600 <= 720 - GROUP BY 1 - ORDER BY 2 diff --git a/docs/metrics/support/satisfaction_score.yml b/docs/metrics/support/satisfaction_score.yml deleted file mode 100644 index 13ba267..0000000 --- a/docs/metrics/support/satisfaction_score.yml +++ /dev/null @@ -1,46 +0,0 @@ -- name: satisfaction_score - display_name: Customer Satisfaction Score - category: support - type: average - unit: score (1-5) - grain: monthly - time_column: created_at - table: support_tickets - expression: "AVG(satisfaction_score)" - description: "Average customer satisfaction rating on a 1-5 scale collected after ticket resolution. Measures customer perception of support quality and identifies areas for improvement." - dimensions: - - priority - - category - - agent - - resolution_type - notes: - - "Score is collected via post-resolution survey email" - - "Response rate is typically 25-35% of resolved tickets" - - "Score of 4+ is considered 'satisfied', below 3 is 'unsatisfied'" - - "Only tickets with a satisfaction response are included" - synonyms: - - csat - - customer_satisfaction - - satisfaction_rating - sql: | - SELECT - DATE_TRUNC('month', created_at) AS month, - ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction, - COUNT(*) AS responses, - ROUND( - COUNT(CASE WHEN satisfaction_score >= 4 THEN 1 END) * 100.0 - / COUNT(*), 1 - ) AS pct_satisfied - FROM support_tickets - WHERE satisfaction_score IS NOT NULL - GROUP BY 1 - ORDER BY 1 - sql_by_category: | - SELECT - category, - ROUND(AVG(satisfaction_score), 2) AS avg_satisfaction, - COUNT(*) AS responses - FROM support_tickets - WHERE satisfaction_score IS NOT NULL - GROUP BY 1 - ORDER BY 2 DESC diff --git a/tests/test_metrics.py b/tests/test_metrics.py index f8f2e64..805acea 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,151 +1,352 @@ -"""Tests for business metric YAML definitions and parser.""" +"""Tests for OpenMetadata catalog metrics and parsing functions.""" -import yaml import pytest -from pathlib import Path - -from webapp.utils.metric_parser import MetricParser +from unittest.mock import Mock, MagicMock, patch +from webapp.app import _parse_om_metric, _load_metrics_from_catalog, _build_om_metric_detail, METRIC_CATEGORY_META -METRICS_DIR = Path(__file__).parent.parent / "docs" / "metrics" +class TestParseOmMetric: + """Unit tests for _parse_om_metric() function.""" -REQUIRED_FIELDS = [ - "name", "display_name", "category", "type", "unit", - "grain", "time_column", "table", "description", "expression", -] + def test_parse_metric_basic_fields(self): + """Extract basic fields from raw metric.""" + raw = { + "fullyQualifiedName": "catalog.metrics.total_revenue", + "name": "total_revenue", + "displayName": "Total Revenue", + "description": "Total revenue from all orders", + "tags": [], + } + + result = _parse_om_metric(raw) + + assert result["name"] == "total_revenue" + assert result["display_name"] == "Total Revenue" + assert result["description"] == "Total revenue from all orders" + assert result["path"] == "catalog:catalog.metrics.total_revenue" + + def test_parse_metric_with_category_tag(self): + """Extract category from MetricCategory.* tag.""" + raw = { + "fullyQualifiedName": "catalog.metrics.revenue_metric", + "name": "revenue_metric", + "displayName": "Revenue", + "description": "Test", + "tags": [ + {"tagFQN": "MetricCategory.finance"}, + {"tagFQN": "Grain.monthly"}, + ], + } + + result = _parse_om_metric(raw) + + assert result["category"] == "finance" + assert result["grain"] == "monthly" + + def test_parse_metric_with_category_legacy_tag(self): + """Extract category from Category.* tag (legacy).""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "tags": [ + {"tagFQN": "Category.marketing"}, + ], + } + + result = _parse_om_metric(raw) + + assert result["category"] == "marketing" + + def test_parse_metric_fallback_to_general(self): + """Default to 'general' category if no category tag.""" + raw = { + "fullyQualifiedName": "catalog.metrics.unknown", + "name": "unknown", + "displayName": "Unknown", + "description": "Test", + "tags": [], + } + + result = _parse_om_metric(raw) + + assert result["category"] == "general" + + def test_parse_metric_display_name_fallback(self): + """Use name as display_name if displayName not provided.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test_metric", + "description": "Test", + "tags": [], + } + + result = _parse_om_metric(raw) + + assert result["display_name"] == "test_metric" + + def test_parse_metric_path_has_catalog_prefix(self): + """Path field includes catalog: prefix for JS routing.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "tags": [], + } + + result = _parse_om_metric(raw) + + assert result["path"].startswith("catalog:") -def _get_all_metric_files(): - """Return list of all metric YAML files.""" - return sorted(METRICS_DIR.glob("*/*.yml")) +class TestLoadMetricsFromCatalog: + """Tests for _load_metrics_from_catalog() with mocked enricher.""" + + @patch('webapp.app._catalog_enricher') + def test_returns_empty_list_if_enricher_disabled(self, mock_enricher): + """Return empty list if enricher not enabled.""" + mock_enricher.enabled = False + + result = _load_metrics_from_catalog() + + assert result == [] + + @patch('webapp.app._catalog_enricher') + def test_returns_empty_list_if_enricher_none(self, mock_enricher): + """Return empty list if enricher is None.""" + with patch('webapp.app._catalog_enricher', None): + result = _load_metrics_from_catalog() + assert result == [] + + @patch('webapp.app._catalog_enricher') + def test_groups_metrics_by_category(self, mock_enricher): + """Group metrics by category key.""" + mock_enricher.enabled = True + mock_enricher.get_metrics.return_value = [ + { + "fullyQualifiedName": "catalog.metrics.finance_metric", + "name": "finance_metric", + "displayName": "Finance Metric", + "description": "Test", + "tags": [{"tagFQN": "MetricCategory.finance"}], + }, + { + "fullyQualifiedName": "catalog.metrics.marketing_metric", + "name": "marketing_metric", + "displayName": "Marketing Metric", + "description": "Test", + "tags": [{"tagFQN": "MetricCategory.marketing"}], + }, + ] + + with patch('webapp.app._catalog_enricher', mock_enricher): + result = _load_metrics_from_catalog() + + # Should have at least one of the known categories from METRIC_CATEGORY_META + assert len(result) >= 1 + keys = [c["key"] for c in result] + assert "finance" in keys or "marketing" in keys + assert all(len(c["metrics"]) > 0 for c in result) + + @patch('webapp.app._catalog_enricher') + def test_uses_metric_category_meta_order(self, mock_enricher): + """Result categories ordered by METRIC_CATEGORY_META.""" + mock_enricher.enabled = True + mock_enricher.get_metrics.return_value = [ + { + "fullyQualifiedName": "catalog.metrics.m1", + "name": "m1", + "displayName": "M1", + "description": "Test", + "tags": [{"tagFQN": "MetricCategory.revenue"}], + }, + { + "fullyQualifiedName": "catalog.metrics.m2", + "name": "m2", + "displayName": "M2", + "description": "Test", + "tags": [{"tagFQN": "MetricCategory.customers"}], + }, + ] + + with patch('webapp.app._catalog_enricher', mock_enricher): + result = _load_metrics_from_catalog() + + # revenue should come before customers per METRIC_CATEGORY_META order + keys = [c["key"] for c in result] + if "revenue" in keys and "customers" in keys: + revenue_idx = keys.index("revenue") + customers_idx = keys.index("customers") + assert revenue_idx < customers_idx + + @patch('webapp.app._catalog_enricher') + def test_uses_category_label_from_meta(self, mock_enricher): + """Category label comes from METRIC_CATEGORY_META.""" + mock_enricher.enabled = True + mock_enricher.get_metrics.return_value = [ + { + "fullyQualifiedName": "catalog.metrics.m1", + "name": "m1", + "displayName": "M1", + "description": "Test", + "tags": [{"tagFQN": "MetricCategory.revenue"}], + }, + ] + + with patch('webapp.app._catalog_enricher', mock_enricher): + result = _load_metrics_from_catalog() + + # Verify that a known category gets its label from METRIC_CATEGORY_META + assert len(result) >= 1 + revenue_cat = [c for c in result if c["key"] == "revenue"] + if revenue_cat: + assert revenue_cat[0]["label"] == METRIC_CATEGORY_META["revenue"]["label"] + assert revenue_cat[0]["css"] == METRIC_CATEGORY_META["revenue"]["css"] + + @patch('webapp.app._catalog_enricher') + def test_graceful_failure_on_exception(self, mock_enricher): + """Return empty list on exception (graceful degradation).""" + mock_enricher.enabled = True + mock_enricher.get_metrics.side_effect = Exception("API error") + + with patch('webapp.app._catalog_enricher', mock_enricher): + result = _load_metrics_from_catalog() + + assert result == [] + + @patch('webapp.app._catalog_enricher') + def test_empty_metrics_list(self, mock_enricher): + """Return empty list when catalog has no metrics.""" + mock_enricher.enabled = True + mock_enricher.get_metrics.return_value = [] + + with patch('webapp.app._catalog_enricher', mock_enricher): + result = _load_metrics_from_catalog() + + assert result == [] -class TestMetricYAMLValidity: - """Validate all metric YAML files have required fields.""" +class TestBuildOmMetricDetail: + """Tests for _build_om_metric_detail() function.""" - def test_metrics_directory_exists(self): - assert METRICS_DIR.exists(), f"Metrics directory not found: {METRICS_DIR}" + def test_build_basic_structure(self): + """Build MetricParser-compatible structure from raw metric.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test_metric", + "displayName": "Test Metric", + "description": "A test metric", + "expression": "COUNT(*)", + "owners": [{"name": "data_team"}], + "tags": [], + } - def test_at_least_one_metric_exists(self): - files = _get_all_metric_files() - assert len(files) > 0, "No metric YAML files found" + result = _build_om_metric_detail(raw) - @pytest.mark.parametrize("metric_file", _get_all_metric_files(), ids=lambda f: f.relative_to(METRICS_DIR).as_posix()) - def test_all_metric_yamls_valid(self, metric_file): - """Every metric YAML must have all required fields.""" - with open(metric_file) as f: - raw = yaml.safe_load(f) + assert result["name"] == "test_metric" + assert result["display_name"] == "Test Metric" + assert result["category"] == "general" + assert result["metadata"]["type"] == "" + assert result["metadata"]["unit"] == "" + assert result["metadata"]["grain"] == "" + assert result["overview"]["description"] == "A test metric" - assert isinstance(raw, list), f"{metric_file.name}: expected YAML list, got {type(raw).__name__}" - assert len(raw) >= 1, f"{metric_file.name}: YAML list is empty" + def test_extract_metadata_from_tags(self): + """Extract type, unit, grain from tags.""" + raw = { + "fullyQualifiedName": "catalog.metrics.revenue", + "name": "revenue", + "displayName": "Revenue", + "description": "Test", + "expression": "SUM(amount)", + "owners": [], + "tags": [ + {"tagFQN": "MetricType.sum"}, + {"tagFQN": "Unit.usd"}, + {"tagFQN": "Grain.monthly"}, + {"tagFQN": "MetricCategory.finance"}, + ], + } - metric = raw[0] - assert isinstance(metric, dict), f"{metric_file.name}: first item is not a dict" + result = _build_om_metric_detail(raw) - missing = [field for field in REQUIRED_FIELDS if field not in metric] - assert not missing, f"{metric_file.name}: missing required fields: {missing}" + assert result["metadata"]["type"] == "sum" + assert result["metadata"]["unit"] == "usd" + assert result["metadata"]["grain"] == "monthly" + assert result["category"] == "finance" - # Category must match parent directory name - expected_category = metric_file.parent.name - assert metric["category"] == expected_category, ( - f"{metric_file.name}: category '{metric['category']}' != directory '{expected_category}'" - ) + def test_extract_dimensions_from_tags(self): + """Extract dimension names from Dimension.* tags.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "expression": "SELECT", + "owners": [], + "tags": [ + {"tagFQN": "Dimension.region"}, + {"tagFQN": "Dimension.channel"}, + ], + } + result = _build_om_metric_detail(raw) -class TestMetricCategoriesInParser: - """Verify CATEGORY_COLORS has entries for all used categories.""" + assert "region" in result["dimensions"] + assert "channel" in result["dimensions"] - def test_all_categories_have_colors(self): - files = _get_all_metric_files() - categories_used = set() - for f in files: - with open(f) as fh: - raw = yaml.safe_load(fh) - if isinstance(raw, list) and raw: - categories_used.add(raw[0].get("category", "")) + def test_expression_in_sql_examples(self): + """Expression field goes into sql_examples for modal display.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "expression": "SELECT COUNT(*) FROM users", + "owners": [], + "tags": [], + } - parser = MetricParser(METRICS_DIR) - missing = categories_used - set(parser.CATEGORY_COLORS.keys()) - assert not missing, f"CATEGORY_COLORS missing entries for: {missing}" + result = _build_om_metric_detail(raw) + assert "expression" in result["sql_examples"] + assert result["sql_examples"]["expression"]["query"] == "SELECT COUNT(*) FROM users" + assert result["sql_examples"]["expression"]["title"] == "Metric Expression" -class TestMetricParserParsesSample: - """Parse one metric and verify structured output.""" + def test_extract_owner_names(self): + """Extract owner names from owners list.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "expression": "SELECT", + "owners": [ + {"name": "alice", "email": "alice@example.com"}, + {"name": "bob"}, + ], + "tags": [], + } - def test_parse_total_revenue(self): - parser = MetricParser(METRICS_DIR) - data = parser.parse_metric("revenue/total_revenue.yml") + result = _build_om_metric_detail(raw) - assert data["name"] == "total_revenue" - assert data["display_name"] == "Total Revenue" - assert data["category"] == "revenue" - assert data["category_color"] == "#0073D1" - assert data["metadata"]["unit"] == "USD" - assert data["metadata"]["grain"] == "monthly" - assert len(data["dimensions"]) > 0 - assert "sql" in data["sql_examples"] - assert data["technical"]["table"] == "orders" - assert data["technical"]["expression"] == "SUM(total_amount)" + # Owner names go to notes.all + assert len(result["notes"]["all"]) == 0 # We don't populate this from owners yet - def test_parse_metric_with_tables_field(self): - parser = MetricParser(METRICS_DIR) - data = parser.parse_metric("revenue/average_order_value.yml") + def test_empty_expression_no_sql_example(self): + """Don't add empty expression to sql_examples.""" + raw = { + "fullyQualifiedName": "catalog.metrics.test", + "name": "test", + "displayName": "Test", + "description": "Test", + "expression": "", + "owners": [], + "tags": [], + } - assert data["name"] == "average_order_value" - assert "sql_by_segment" in data["sql_examples"] + result = _build_om_metric_detail(raw) - -class TestLoadMetricsData: - """Verify _load_metrics_data returns correct structure.""" - - def test_returns_four_categories(self): - from webapp.app import _load_metrics_data - result = _load_metrics_data() - assert isinstance(result, list) - assert len(result) == 4 - category_keys = [c["key"] for c in result] - assert "revenue" in category_keys - assert "customers" in category_keys - assert "marketing" in category_keys - assert "support" in category_keys - - def test_total_metrics_count(self): - from webapp.app import _load_metrics_data - result = _load_metrics_data() - total = sum(len(c["metrics"]) for c in result) - assert total == 10 - - def test_metric_has_required_fields(self): - from webapp.app import _load_metrics_data - result = _load_metrics_data() - for cat in result: - for m in cat["metrics"]: - assert "name" in m - assert "display_name" in m - assert "description" in m - assert "grain" in m - assert "path" in m - - -class TestDynamicSqlFields: - """Verify sql_by_* fields are auto-discovered by parser.""" - - def test_dynamic_sql_fields_discovered(self): - parser = MetricParser(METRICS_DIR) - data = parser.parse_metric("revenue/total_revenue.yml") - # sql_by_channel should be found via dynamic discovery - assert "sql_by_channel" in data["sql_examples"] - assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel" - - def test_dynamic_sql_title_generation(self): - parser = MetricParser(METRICS_DIR) - data = parser.parse_metric("customers/repeat_purchase_rate.yml") - # sql_by_channel should be found via dynamic discovery - assert "sql_by_channel" in data["sql_examples"] - assert data["sql_examples"]["sql_by_channel"]["title"] == "By Channel" - - def test_static_sql_still_works(self): - parser = MetricParser(METRICS_DIR) - data = parser.parse_metric("revenue/total_revenue.yml") - assert "sql" in data["sql_examples"] - assert data["sql_examples"]["sql"]["title"] == "Basic Query" + assert result["sql_examples"] == {} diff --git a/webapp/app.py b/webapp/app.py index e9810fa..b093738 100644 --- a/webapp/app.py +++ b/webapp/app.py @@ -56,6 +56,12 @@ except ImportError: _CATALOG_ENRICHER_AVAILABLE = False CatalogEnricher = None +# Metric parser for modal detail rendering +try: + from webapp.utils.metric_parser import MetricParser +except ImportError: + MetricParser = None + # Configure logging logging.basicConfig( level=logging.INFO, @@ -458,9 +464,17 @@ METRIC_CATEGORY_META = { def _load_metrics_data(): """Load business metric definitions for catalog display. + Prefers metrics from OpenMetadata catalog. Falls back to YAML files if catalog unavailable. + Returns list of category dicts ordered by METRIC_CATEGORY_META: - [{'key': 'revenue', 'label': 'Revenue', 'css': 'sales', 'metrics': [...]}, ...] + [{'key': 'finance', 'label': 'Finance...', 'css': '...', 'metrics': [...]}, ...] """ + # Try catalog first (Phase 2) + catalog_metrics = _load_metrics_from_catalog() + if catalog_metrics: + return catalog_metrics + + # Fallback to YAML files if catalog unavailable # Try production path first, fall back to local dev path metrics_dir = Path("/data/docs/metrics") if not metrics_dir.exists(): @@ -520,6 +534,214 @@ def _load_metrics_data(): return result +def _parse_om_metric(raw_metric: dict) -> dict: + """ + Parse raw OpenMetadata metric dict into format for metric list display. + + Extracts category, grain from tags with standard prefixes: + - Category: tagFQN like "MetricCategory.finance" or "Category.marketing" + - Grain: tagFQN like "Grain.monthly" + + Args: + raw_metric: Raw metric dict from OpenMetadata (id, fullyQualifiedName, description, tags, etc.) + + Returns: + Dict with keys: name, display_name, description, grain, path + (path = "catalog:{fullyQualifiedName}" for JS routing) + """ + fqn = raw_metric.get("fullyQualifiedName", "") + name = raw_metric.get("name", "") + display_name = raw_metric.get("displayName", name) + description = raw_metric.get("description", "") or "" + + # Extract category and grain from tags + tags = raw_metric.get("tags", []) + category = "general" + grain = "" + + for tag in tags: + tag_fqn = tag.get("tagFQN", "") + + # Extract category from MetricCategory.* or Category.* tags + if tag_fqn.startswith("MetricCategory."): + category = tag_fqn.split(".", 1)[1] + elif tag_fqn.startswith("Category."): + category = tag_fqn.split(".", 1)[1] + + # Extract grain from Grain.* tags + if tag_fqn.startswith("Grain."): + grain = tag_fqn.split(".", 1)[1] + + return { + "name": name, + "display_name": display_name, + "description": description, + "grain": grain, + "category": category, + "path": f"catalog:{fqn}", # Special prefix for JS routing + } + + +def _load_metrics_from_catalog() -> list: + """ + Load business metrics from OpenMetadata catalog. + + Groups metrics by category (from tags or fallback to "general"). + Returns same structure as _load_metrics_data() for UI compatibility. + + Returns: + List of category dicts with metrics: + [ + {'key': 'finance', 'label': '...', 'css': '...', 'metrics': [...]}, + {'key': 'marketing', 'label': '...', 'css': '...', 'metrics': [...]} + ] + Returns empty list if catalog disabled or fails. + """ + global _catalog_enricher + + if not _catalog_enricher or not _catalog_enricher.enabled: + return [] + + try: + # Fetch metrics from catalog + raw_metrics = _catalog_enricher.get_metrics() + if not raw_metrics: + logger.debug("No metrics found in catalog") + return [] + + # Parse each metric and group by category + categories = {} + for raw in raw_metrics: + try: + metric = _parse_om_metric(raw) + cat = metric["category"] + + if cat not in categories: + categories[cat] = [] + + categories[cat].append(metric) + + except Exception as e: + logger.warning(f"Failed to parse metric {raw.get('name', '?')}: {e}") + continue + + # Build result using METRIC_CATEGORY_META for order and labels + result = [] + for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]["order"]): + if cat_key in categories: + result.append({ + "key": cat_key, + "label": meta["label"], + "css": meta["css"], + "metrics": categories[cat_key], + }) + + # Add unknown categories at the end + for cat_key, metrics in sorted(categories.items()): + if cat_key not in METRIC_CATEGORY_META: + result.append({ + "key": cat_key, + "label": cat_key.replace("_", " ").title(), + "css": cat_key, + "metrics": metrics, + }) + + logger.info(f"Loaded {sum(len(c['metrics']) for c in result)} metrics from catalog") + return result + + except Exception as e: + logger.warning(f"Failed to load metrics from catalog: {e}") + return [] + + +def _build_om_metric_detail(raw_metric: dict) -> dict: + """ + Convert raw OpenMetadata metric into MetricParser-compatible JSON for modal. + + Maps OpenMetadata fields to MetricParser structure (name, display_name, category, metadata, etc.). + Extracts type, unit, grain from tags with standard prefixes. + + Args: + raw_metric: Raw metric dict from OpenMetadata + + Returns: + Dict matching MetricParser._structure_metric_data() format + """ + fqn = raw_metric.get("fullyQualifiedName", "") + name = raw_metric.get("name", "") + display_name = raw_metric.get("displayName", name) + description = raw_metric.get("description", "") or "" + expression = raw_metric.get("expression", "") or "" + owners = raw_metric.get("owners", []) + + # Extract metadata from tags + tags = raw_metric.get("tags", []) + metric_type = "" + unit = "" + grain = "" + category = "general" + dimensions = [] + + for tag in tags: + tag_fqn = tag.get("tagFQN", "") + + if tag_fqn.startswith("MetricType."): + metric_type = tag_fqn.split(".", 1)[1] + elif tag_fqn.startswith("Unit."): + unit = tag_fqn.split(".", 1)[1] + elif tag_fqn.startswith("Grain."): + grain = tag_fqn.split(".", 1)[1] + elif tag_fqn.startswith("MetricCategory."): + category = tag_fqn.split(".", 1)[1] + elif tag_fqn.startswith("Dimension."): + dimensions.append(tag_fqn.split(".", 1)[1]) + + # Extract owner names + owner_names = [] + for owner in owners: + name_val = owner.get("name") or owner.get("displayName", "") + if name_val: + owner_names.append(name_val) + + # Build MetricParser-compatible structure + return { + "name": name, + "display_name": display_name, + "category": category, + "category_color": MetricParser.CATEGORY_COLORS.get(category, "#6B7280"), + "metadata": { + "type": metric_type, + "unit": unit, + "grain": grain, + "time_column": "", # Not available in OpenMetadata + }, + "overview": { + "description": description.strip(), + "key_insights": [], # Not available in OpenMetadata + }, + "validation": None, # Not available in OpenMetadata + "dimensions": dimensions, + "notes": { + "all": [], # Not available in OpenMetadata + "key_insights": [], + }, + "sql_examples": { + "expression": { + "title": "Metric Expression", + "query": expression, + "complexity": "simple", + } + } if expression else {}, + "technical": { + "table": "", # Not available in OpenMetadata + "expression": expression, + "synonyms": [], + "data_sources": [], + }, + "special_sections": {}, + } + + def _send_welcome_message(username: str) -> None: """Send a welcome message to the user via bot socket after linking.""" try: @@ -787,6 +1009,36 @@ def register_routes(app: Flask) -> None: logger.error(f"Error parsing metric {metric_path}: {e}") return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500 + @app.route("/api/catalog/metrics/") + @login_required + def api_catalog_metric(metric_fqn): + """ + API endpoint to serve metric from OpenMetadata catalog as structured JSON. + + Args: + metric_fqn: Fully qualified name (e.g., "catalog.metrics.total_revenue") + + Returns: + JSON matching MetricParser format for modal rendering + """ + global _catalog_enricher + + if not _catalog_enricher or not _catalog_enricher.enabled: + return jsonify({"error": "Catalog not available"}), 503 + + try: + # Fetch metric from catalog + raw = _catalog_enricher._client.get_metric_by_fqn(metric_fqn) + + # Convert to MetricParser format + metric_data = _build_om_metric_detail(raw) + + return jsonify(metric_data) + + except Exception as e: + logger.error(f"Error fetching catalog metric {metric_fqn}: {e}") + return jsonify({"error": f"Failed to fetch metric: {str(e)}"}), 500 + @app.route("/docs/metrics/") @login_required def serve_metric(metric_path): diff --git a/webapp/static/js/metric_modal.js b/webapp/static/js/metric_modal.js index 0572f37..7326a9d 100644 --- a/webapp/static/js/metric_modal.js +++ b/webapp/static/js/metric_modal.js @@ -9,7 +9,7 @@ let currentMetricData = null; /** * Open metric modal and load data - * @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') + * @param {string} metricPath - Path to metric YAML (e.g., 'finance/infra_cost.yml') or catalog FQN (e.g., 'catalog:...') */ function openMetricModal(metricPath) { currentMetricPath = metricPath; @@ -23,8 +23,13 @@ function openMetricModal(metricPath) { // Show loading state body.innerHTML = '
Loading metric...
'; + // Route based on prefix: catalog:FQN uses /api/catalog/metrics, YAML paths use /api/metrics + const url = metricPath.startsWith('catalog:') + ? `/api/catalog/metrics/${metricPath.slice(8)}` // Remove 'catalog:' prefix + : `/api/metrics/${metricPath}`; + // Fetch metric data - fetch(`/api/metrics/${metricPath}`) + fetch(url) .then(response => { if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); diff --git a/webapp/static/style-custom.css b/webapp/static/style-custom.css index 4b99dd5..ce09fab 100644 --- a/webapp/static/style-custom.css +++ b/webapp/static/style-custom.css @@ -33,6 +33,7 @@ --font-medium: 500; --font-semibold: 600; --font-bold: 700; + --font-extrabold: 800; /* Spacing */ --space-1: 4px; @@ -194,7 +195,7 @@ body { .welcome-v2 h2 { font-size: var(--text-xl); - font-weight: var(--font-semibold); + font-weight: var(--font-extrabold); color: var(--text-primary); margin-bottom: var(--space-2); } diff --git a/webapp/templates/catalog.html b/webapp/templates/catalog.html index 7ff4696..13e8a2e 100644 --- a/webapp/templates/catalog.html +++ b/webapp/templates/catalog.html @@ -118,7 +118,7 @@ .page-title h1 { font-size: 24px; - font-weight: 600; + font-weight: 800; color: var(--text-primary); margin-bottom: 4px; } @@ -186,7 +186,7 @@ .source-card-name { font-size: 16px; - font-weight: 600; + font-weight: 700; color: var(--text-primary); margin-bottom: 2px; }