From ab1a93ed6705a9dc5701577d8936c2f1f9e6998f Mon Sep 17 00:00:00 2001 From: Petr Date: Sun, 15 Mar 2026 01:57:04 +0100 Subject: [PATCH] Strip HTML tags from OpenMetadata descriptions in YAML export OpenMetadata stores descriptions as rich HTML (

, ,  , etc.). Add strip_html() to transformer that converts to clean plain text for YAML files consumed by Claude Code agent. Applied to metric descriptions, table descriptions, and column descriptions. Webapp display dict keeps raw HTML since the modal renders it correctly. --- connectors/openmetadata/transformer.py | 54 +++++++++++++++++++++-- tests/test_openmetadata_transformer.py | 60 ++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 4 deletions(-) diff --git a/connectors/openmetadata/transformer.py b/connectors/openmetadata/transformer.py index b842ec8..76c4feb 100644 --- a/connectors/openmetadata/transformer.py +++ b/connectors/openmetadata/transformer.py @@ -14,6 +14,7 @@ Extracts metadata from OpenMetadata tag conventions: - Unit.* -> unit of measurement """ +import html import logging import re from typing import Any, Dict, List, Optional @@ -193,6 +194,51 @@ def extract_tag_names(tags: List[Dict[str, Any]]) -> List[str]: return result +def strip_html(text: str) -> str: + """ + Strip HTML tags and decode entities from OpenMetadata descriptions. + + OpenMetadata stores descriptions as rich HTML. This converts to clean + plain text suitable for YAML files and agent consumption. + + Handles: + - HTML tags (

, , ,