fix: strip HTML from table and column descriptions in OpenMetadata enricher
Imports strip_html from transformer and applies it to both table-level and column-level descriptions parsed from the OpenMetadata API response.
This commit is contained in:
parent
30987eef16
commit
49cb940729
1 changed files with 3 additions and 2 deletions
|
|
@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Any
|
||||||
|
|
||||||
from dataclasses import dataclass as _dataclass
|
from dataclasses import dataclass as _dataclass
|
||||||
from .client import OpenMetadataClient
|
from .client import OpenMetadataClient
|
||||||
|
from .transformer import strip_html
|
||||||
|
|
||||||
|
|
||||||
@_dataclass
|
@_dataclass
|
||||||
|
|
@ -207,13 +208,13 @@ class CatalogEnricher:
|
||||||
CatalogTableData or None if parsing fails
|
CatalogTableData or None if parsing fails
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
description = raw.get("description", "") or ""
|
description = strip_html(raw.get("description", "") or "")
|
||||||
|
|
||||||
# Parse columns
|
# Parse columns
|
||||||
columns = {}
|
columns = {}
|
||||||
for col in raw.get("columns", []):
|
for col in raw.get("columns", []):
|
||||||
col_name = col.get("name", "").lower()
|
col_name = col.get("name", "").lower()
|
||||||
col_description = col.get("description", "") or ""
|
col_description = strip_html(col.get("description", "") or "")
|
||||||
col_type = col.get("dataType", "")
|
col_type = col.get("dataType", "")
|
||||||
|
|
||||||
columns[col_name] = CatalogColumnData(
|
columns[col_name] = CatalogColumnData(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue