""" OpenMetadata data transformer. Shared logic for parsing OpenMetadata API responses into structured dicts suitable for YAML export and webapp display. Used by: - src/catalog_export.py (YAML file generation) - webapp/app.py (metric list and detail display) Extracts metadata from OpenMetadata tag conventions: - MetricCategory.* or Category.* -> category - Grain.* -> grain/granularity - Dimension.* -> dimensions list - MetricType.* -> metric type - Unit.* -> unit of measurement """ import html import logging import re from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) def extract_category(tags: List[Dict[str, Any]]) -> str: """ Extract metric category from OpenMetadata tags. Looks for tagFQN prefixed with "MetricCategory." or "Category.". Returns the first match found, or "general" as fallback. Args: tags: List of tag dicts from OpenMetadata (each with "tagFQN" key) Returns: Category string (e.g., "finance", "marketing") """ for tag in tags: tag_fqn = tag.get("tagFQN", "") if tag_fqn.startswith("MetricCategory."): return tag_fqn.split(".", 1)[1] if tag_fqn.startswith("Category."): return tag_fqn.split(".", 1)[1] return "general" def extract_grain(raw_metric: Dict[str, Any]) -> str: """ Extract metric granularity from OpenMetadata metric data. Checks the "granularity" field first, then falls back to Grain.* tags. Args: raw_metric: Raw metric dict from OpenMetadata API Returns: Grain string (e.g., "monthly", "daily"), lowercase. Empty string if not found. """ grain = raw_metric.get("granularity", "") or "" if grain: return grain.lower() for tag in raw_metric.get("tags", []): tag_fqn = tag.get("tagFQN", "") if tag_fqn.startswith("Grain."): return tag_fqn.split(".", 1)[1].lower() return "" def extract_dimensions(tags: List[Dict[str, Any]]) -> List[str]: """ Extract dimension names from OpenMetadata tags. Looks for tagFQN prefixed with "Dimension.". Args: tags: List of tag dicts from OpenMetadata Returns: List of dimension names (e.g., ["economic_area", "merchant_country"]) """ dimensions = [] for tag in tags: tag_fqn = tag.get("tagFQN", "") if tag_fqn.startswith("Dimension."): dimensions.append(tag_fqn.split(".", 1)[1]) return dimensions def extract_expression(raw_metric: Dict[str, Any]) -> str: """ Extract metric SQL expression from OpenMetadata metric data. Handles both dict format ({"expression": "..."}) and plain string. Args: raw_metric: Raw metric dict from OpenMetadata API Returns: SQL expression string, or empty string if not found. """ metric_expr = raw_metric.get("metricExpression", {}) if isinstance(metric_expr, dict): # OpenMetadata uses "code" field for the SQL expression result = metric_expr.get("code", "") or metric_expr.get("expression", "") or "" if result: return result elif isinstance(metric_expr, str) and metric_expr: return metric_expr # Fallback: top-level expression field (OpenMetadata format varies) return raw_metric.get("expression", "") or "" def extract_owners(raw: Dict[str, Any]) -> List[str]: """ Extract owner names from OpenMetadata entity data. Args: raw: Raw entity dict with optional "owners" list Returns: List of owner name strings """ names = [] for owner in raw.get("owners", []): name = owner.get("name") or owner.get("displayName", "") if name: names.append(name) return names def extract_metric_type(raw_metric: Dict[str, Any]) -> str: """ Extract metric type from OpenMetadata metric data. Checks "metricType" field first, then MetricType.* tags. Args: raw_metric: Raw metric dict from OpenMetadata API Returns: Metric type string (e.g., "sum", "count"), lowercase. """ metric_type = raw_metric.get("metricType", "") or "" if metric_type: return metric_type.lower() for tag in raw_metric.get("tags", []): tag_fqn = tag.get("tagFQN", "") if tag_fqn.startswith("MetricType."): return tag_fqn.split(".", 1)[1].lower() return "" def extract_unit(raw_metric: Dict[str, Any]) -> str: """ Extract unit of measurement from OpenMetadata metric data. Checks "unitOfMeasurement" field first, then Unit.* tags. Args: raw_metric: Raw metric dict from OpenMetadata API Returns: Unit string (e.g., "USD", "count"). """ unit = raw_metric.get("unitOfMeasurement", "") or "" if unit: return unit for tag in raw_metric.get("tags", []): tag_fqn = tag.get("tagFQN", "") if tag_fqn.startswith("Unit."): return tag_fqn.split(".", 1)[1] return "" def has_tag(tags: List[Dict[str, Any]], tag_fqn: str) -> bool: """ Check if a specific tag (by FQN) is present in the tag list. Args: tags: List of tag dicts from OpenMetadata tag_fqn: Fully qualified tag name to check (e.g., "AIAgent.FoundryAI") Returns: True if the tag is found """ return any(t.get("tagFQN", "") == tag_fqn for t in tags) def extract_tag_names(tags: List[Dict[str, Any]]) -> List[str]: """ Extract simple tag names from OpenMetadata tag list. Uses "name" field if present, otherwise extracts last segment of "tagFQN". Args: tags: List of tag dicts from OpenMetadata Returns: List of tag name strings """ result = [] for tag in tags: name = tag.get("name") or tag.get("tagFQN", "").split(".")[-1] if name: result.append(name) return result def strip_html(text: str) -> str: """ Strip HTML tags and decode entities from OpenMetadata descriptions. OpenMetadata stores descriptions as rich HTML. This converts to clean plain text suitable for YAML files and agent consumption. Handles: - HTML tags (

, , ,