diff --git a/connectors/openmetadata/transformer.py b/connectors/openmetadata/transformer.py index b842ec8..76c4feb 100644 --- a/connectors/openmetadata/transformer.py +++ b/connectors/openmetadata/transformer.py @@ -14,6 +14,7 @@ Extracts metadata from OpenMetadata tag conventions: - Unit.* -> unit of measurement """ +import html import logging import re from typing import Any, Dict, List, Optional @@ -193,6 +194,51 @@ def extract_tag_names(tags: List[Dict[str, Any]]) -> List[str]: return result +def strip_html(text: str) -> str: + """ + Strip HTML tags and decode entities from OpenMetadata descriptions. + + OpenMetadata stores descriptions as rich HTML. This converts to clean + plain text suitable for YAML files and agent consumption. + + Handles: + - HTML tags (

, , ,