From 49cb9407295c96fa0c7ccd847bc521722589bf81 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 9 Apr 2026 18:42:37 +0200 Subject: [PATCH] fix: strip HTML from table and column descriptions in OpenMetadata enricher Imports strip_html from transformer and applies it to both table-level and column-level descriptions parsed from the OpenMetadata API response. --- connectors/openmetadata/enricher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/connectors/openmetadata/enricher.py b/connectors/openmetadata/enricher.py index c550033..42d8213 100644 --- a/connectors/openmetadata/enricher.py +++ b/connectors/openmetadata/enricher.py @@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Any from dataclasses import dataclass as _dataclass from .client import OpenMetadataClient +from .transformer import strip_html @_dataclass @@ -207,13 +208,13 @@ class CatalogEnricher: CatalogTableData or None if parsing fails """ try: - description = raw.get("description", "") or "" + description = strip_html(raw.get("description", "") or "") # Parse columns columns = {} for col in raw.get("columns", []): col_name = col.get("name", "").lower() - col_description = col.get("description", "") or "" + col_description = strip_html(col.get("description", "") or "") col_type = col.get("dataType", "") columns[col_name] = CatalogColumnData(