Filter catalog metrics by configurable tag (e.g., AIAgent.FoundryAI)
Add filter_tag support to catalog_export and webapp so only metrics with the required tag are exported to YAML and displayed in UI. Previously all 19+ metrics were exported regardless of relevance. - Add has_tag() helper to transformer module - catalog_export.py: filter_tag parameter from instance.yaml openmetadata config - webapp/app.py: filter metrics in _load_metrics_from_catalog() - 7 new tests (has_tag, filter_tag export, stale cleanup)
This commit is contained in:
parent
440662c8fe
commit
ad525a96aa
5 changed files with 120 additions and 5 deletions
|
|
@ -174,6 +174,20 @@ def extract_unit(raw_metric: Dict[str, Any]) -> str:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def has_tag(tags: List[Dict[str, Any]], tag_fqn: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a specific tag (by FQN) is present in the tag list.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tags: List of tag dicts from OpenMetadata
|
||||||
|
tag_fqn: Fully qualified tag name to check (e.g., "AIAgent.FoundryAI")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the tag is found
|
||||||
|
"""
|
||||||
|
return any(t.get("tagFQN", "") == tag_fqn for t in tags)
|
||||||
|
|
||||||
|
|
||||||
def extract_tag_names(tags: List[Dict[str, Any]]) -> List[str]:
|
def extract_tag_names(tags: List[Dict[str, Any]]) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Extract simple tag names from OpenMetadata tag list.
|
Extract simple tag names from OpenMetadata tag list.
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ from connectors.openmetadata.client import OpenMetadataClient
|
||||||
from connectors.openmetadata.transformer import (
|
from connectors.openmetadata.transformer import (
|
||||||
extract_category,
|
extract_category,
|
||||||
extract_grain,
|
extract_grain,
|
||||||
|
has_tag,
|
||||||
metric_to_yaml_dict,
|
metric_to_yaml_dict,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
table_to_yaml_dict,
|
table_to_yaml_dict,
|
||||||
|
|
@ -121,21 +122,24 @@ def export_metrics(
|
||||||
client: OpenMetadataClient,
|
client: OpenMetadataClient,
|
||||||
docs_dir: Path,
|
docs_dir: Path,
|
||||||
catalog_url: str,
|
catalog_url: str,
|
||||||
|
filter_tag: str = "",
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Export metrics from OpenMetadata to YAML files.
|
Export metrics from OpenMetadata to YAML files.
|
||||||
|
|
||||||
For each metric:
|
For each metric:
|
||||||
1. Fetches all metrics from catalog API
|
1. Fetches all metrics from catalog API
|
||||||
2. Transforms each to YAML-compatible dict
|
2. Filters by required tag (if configured)
|
||||||
3. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml
|
3. Transforms each to YAML-compatible dict
|
||||||
4. Writes index file: {docs_dir}/metrics/metrics.yml
|
4. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml
|
||||||
5. Cleans up stale auto-generated files
|
5. Writes index file: {docs_dir}/metrics/metrics.yml
|
||||||
|
6. Cleans up stale auto-generated files
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client: Initialized OpenMetadata API client
|
client: Initialized OpenMetadata API client
|
||||||
docs_dir: Base docs directory (e.g., /data/docs)
|
docs_dir: Base docs directory (e.g., /data/docs)
|
||||||
catalog_url: Catalog URL for header comments
|
catalog_url: Catalog URL for header comments
|
||||||
|
filter_tag: If set, only export metrics that have this tag (e.g., "AIAgent.FoundryAI")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Number of metrics exported
|
Number of metrics exported
|
||||||
|
|
@ -151,6 +155,14 @@ def export_metrics(
|
||||||
|
|
||||||
logger.info(f"Fetched {len(raw_metrics)} metrics from catalog")
|
logger.info(f"Fetched {len(raw_metrics)} metrics from catalog")
|
||||||
|
|
||||||
|
# Filter by tag if configured
|
||||||
|
if filter_tag:
|
||||||
|
filtered = [m for m in raw_metrics if has_tag(m.get("tags", []), filter_tag)]
|
||||||
|
logger.info(
|
||||||
|
f"Tag filter '{filter_tag}': {len(filtered)}/{len(raw_metrics)} metrics matched"
|
||||||
|
)
|
||||||
|
raw_metrics = filtered
|
||||||
|
|
||||||
# Track which files we write (for cleanup)
|
# Track which files we write (for cleanup)
|
||||||
written_files: set[Path] = set()
|
written_files: set[Path] = set()
|
||||||
index_entries: List[Dict[str, Any]] = []
|
index_entries: List[Dict[str, Any]] = []
|
||||||
|
|
@ -400,9 +412,12 @@ def main() -> None:
|
||||||
logger.warning(f"Failed to initialize OpenMetadata client: {e}")
|
logger.warning(f"Failed to initialize OpenMetadata client: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Optional tag filter (only export metrics with this tag)
|
||||||
|
filter_tag = om_config.get("filter_tag", "").strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Export metrics
|
# Export metrics
|
||||||
metrics_count = export_metrics(client, docs_dir, catalog_url)
|
metrics_count = export_metrics(client, docs_dir, catalog_url, filter_tag=filter_tag)
|
||||||
|
|
||||||
# Export tables
|
# Export tables
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -297,6 +297,52 @@ class TestExportMetrics:
|
||||||
assert (docs / "metrics" / "finance" / "total_revenue.yml").exists()
|
assert (docs / "metrics" / "finance" / "total_revenue.yml").exists()
|
||||||
assert (docs / "metrics" / "product" / "active_users.yml").exists()
|
assert (docs / "metrics" / "product" / "active_users.yml").exists()
|
||||||
|
|
||||||
|
def test_export_metrics_filter_tag_keeps_matching(self, tmp_path: Path, mock_client):
|
||||||
|
"""Only metrics with the filter_tag are exported."""
|
||||||
|
tagged = _make_raw_metric(name="M1", fqn="M1", category_tag="MetricCategory.finance")
|
||||||
|
tagged["tags"].append({"tagFQN": "AIAgent.FoundryAI", "name": "FoundryAI"})
|
||||||
|
|
||||||
|
untagged = _make_raw_metric(
|
||||||
|
name="Live Deals", fqn="LiveDeals", category_tag="MetricCategory.supply"
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client.get_metrics.return_value = [tagged, untagged]
|
||||||
|
|
||||||
|
docs = tmp_path / "docs"
|
||||||
|
count = export_metrics(mock_client, docs, CATALOG_URL, filter_tag="AIAgent.FoundryAI")
|
||||||
|
|
||||||
|
assert count == 1
|
||||||
|
assert (docs / "metrics" / "finance" / "m1.yml").exists()
|
||||||
|
assert not (docs / "metrics" / "supply").exists()
|
||||||
|
|
||||||
|
def test_export_metrics_filter_tag_empty_exports_all(self, tmp_path: Path, mock_client):
|
||||||
|
"""Empty filter_tag means no filtering - all metrics exported."""
|
||||||
|
mock_client.get_metrics.return_value = [
|
||||||
|
_make_raw_metric(name="A", fqn="A"),
|
||||||
|
_make_raw_metric(name="B", fqn="B"),
|
||||||
|
]
|
||||||
|
|
||||||
|
docs = tmp_path / "docs"
|
||||||
|
count = export_metrics(mock_client, docs, CATALOG_URL, filter_tag="")
|
||||||
|
|
||||||
|
assert count == 2
|
||||||
|
|
||||||
|
def test_export_metrics_filter_tag_cleans_stale_untagged(self, tmp_path: Path, mock_client):
|
||||||
|
"""Stale files from previously-exported untagged metrics get cleaned up."""
|
||||||
|
tagged = _make_raw_metric(name="M1", fqn="M1", category_tag="MetricCategory.finance")
|
||||||
|
tagged["tags"].append({"tagFQN": "AIAgent.FoundryAI", "name": "FoundryAI"})
|
||||||
|
mock_client.get_metrics.return_value = [tagged]
|
||||||
|
|
||||||
|
docs = tmp_path / "docs"
|
||||||
|
stale_dir = docs / "metrics" / "general"
|
||||||
|
stale_dir.mkdir(parents=True)
|
||||||
|
stale = stale_dir / "livedeals.yml"
|
||||||
|
stale.write_text(AUTO_GENERATED_MARKER + "\nname: livedeals\n")
|
||||||
|
|
||||||
|
export_metrics(mock_client, docs, CATALOG_URL, filter_tag="AIAgent.FoundryAI")
|
||||||
|
|
||||||
|
assert not stale.exists()
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# 4. export_tables
|
# 4. export_tables
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ from connectors.openmetadata.transformer import (
|
||||||
extract_owners,
|
extract_owners,
|
||||||
extract_tag_names,
|
extract_tag_names,
|
||||||
extract_unit,
|
extract_unit,
|
||||||
|
has_tag,
|
||||||
metric_to_detail_dict,
|
metric_to_detail_dict,
|
||||||
metric_to_display_dict,
|
metric_to_display_dict,
|
||||||
metric_to_yaml_dict,
|
metric_to_yaml_dict,
|
||||||
|
|
@ -331,6 +332,30 @@ class TestExtractUnit:
|
||||||
# extract_tag_names
|
# extract_tag_names
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestHasTag:
|
||||||
|
def test_has_tag_present(self):
|
||||||
|
"""Returns True when tag with matching FQN is in the list."""
|
||||||
|
tags = [
|
||||||
|
{"tagFQN": "AIAgent.FoundryAI", "name": "FoundryAI"},
|
||||||
|
{"tagFQN": "Tier.Tier1"},
|
||||||
|
]
|
||||||
|
assert has_tag(tags, "AIAgent.FoundryAI") is True
|
||||||
|
|
||||||
|
def test_has_tag_absent(self):
|
||||||
|
"""Returns False when tag is not in the list."""
|
||||||
|
tags = [{"tagFQN": "Tier.Tier2"}]
|
||||||
|
assert has_tag(tags, "AIAgent.FoundryAI") is False
|
||||||
|
|
||||||
|
def test_has_tag_empty_list(self):
|
||||||
|
"""Returns False for empty tag list."""
|
||||||
|
assert has_tag([], "AIAgent.FoundryAI") is False
|
||||||
|
|
||||||
|
def test_has_tag_partial_match(self):
|
||||||
|
"""Does not match partial FQN."""
|
||||||
|
tags = [{"tagFQN": "AIAgent.FoundryAI_v2"}]
|
||||||
|
assert has_tag(tags, "AIAgent.FoundryAI") is False
|
||||||
|
|
||||||
|
|
||||||
class TestExtractTagNames:
|
class TestExtractTagNames:
|
||||||
def test_extract_tag_names_with_name_field(self):
|
def test_extract_tag_names_with_name_field(self):
|
||||||
"""Tags with 'name' field use that value."""
|
"""Tags with 'name' field use that value."""
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,7 @@ except ImportError:
|
||||||
# Shared OpenMetadata transformer (catalog -> dict)
|
# Shared OpenMetadata transformer (catalog -> dict)
|
||||||
try:
|
try:
|
||||||
from connectors.openmetadata.transformer import (
|
from connectors.openmetadata.transformer import (
|
||||||
|
has_tag as _transformer_has_tag,
|
||||||
metric_to_detail_dict as _transformer_metric_detail,
|
metric_to_detail_dict as _transformer_metric_detail,
|
||||||
metric_to_display_dict as _transformer_metric_display,
|
metric_to_display_dict as _transformer_metric_display,
|
||||||
)
|
)
|
||||||
|
|
@ -81,6 +82,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Global catalog enricher (initialized in create_app)
|
# Global catalog enricher (initialized in create_app)
|
||||||
_catalog_enricher = None
|
_catalog_enricher = None
|
||||||
|
_catalog_filter_tag = ""
|
||||||
|
|
||||||
|
|
||||||
def get_git_commit_hash() -> str:
|
def get_git_commit_hash() -> str:
|
||||||
|
|
@ -122,6 +124,12 @@ def create_app() -> Flask:
|
||||||
_catalog_enricher = CatalogEnricher(instance_config)
|
_catalog_enricher = CatalogEnricher(instance_config)
|
||||||
if _catalog_enricher.enabled:
|
if _catalog_enricher.enabled:
|
||||||
logger.info("OpenMetadata catalog enricher initialized")
|
logger.info("OpenMetadata catalog enricher initialized")
|
||||||
|
# Store filter tag for metric filtering
|
||||||
|
global _catalog_filter_tag
|
||||||
|
om_config = instance_config.get("openmetadata", {})
|
||||||
|
_catalog_filter_tag = om_config.get("filter_tag", "").strip()
|
||||||
|
if _catalog_filter_tag:
|
||||||
|
logger.info(f"Catalog metric filter tag: {_catalog_filter_tag}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to initialize catalog enricher: {e}")
|
logger.warning(f"Failed to initialize catalog enricher: {e}")
|
||||||
_catalog_enricher = None
|
_catalog_enricher = None
|
||||||
|
|
@ -720,6 +728,13 @@ def _load_metrics_from_catalog() -> list:
|
||||||
logger.debug("No metrics found in catalog")
|
logger.debug("No metrics found in catalog")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Filter by tag if configured
|
||||||
|
if _catalog_filter_tag and _TRANSFORMER_AVAILABLE:
|
||||||
|
raw_metrics = [
|
||||||
|
m for m in raw_metrics
|
||||||
|
if _transformer_has_tag(m.get("tags", []), _catalog_filter_tag)
|
||||||
|
]
|
||||||
|
|
||||||
# Parse each metric and group by category
|
# Parse each metric and group by category
|
||||||
categories = {}
|
categories = {}
|
||||||
for raw in raw_metrics:
|
for raw in raw_metrics:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue