Add data product discovery, fix remove-analyst script
- client.py: add search_by_data_product() for OpenMetadata search API - catalog_export.py: prefer data product discovery over tag filtering (finds all 16 metrics in FoundryAIDataModel vs 3 with tag filter) - remove-analyst: fix GROUPS bash variable collision, improve messaging
This commit is contained in:
parent
ab99f0af92
commit
fb63a72a98
3 changed files with 107 additions and 30 deletions
|
|
@ -8,6 +8,7 @@ Low-level HTTP wrapper for OpenMetadata REST API with these functions:
|
||||||
4. Proper error handling and logging
|
4. Proper error handling and logging
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -138,6 +139,50 @@ class OpenMetadataClient:
|
||||||
|
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
def search_by_data_product(
|
||||||
|
self,
|
||||||
|
data_product_name: str,
|
||||||
|
entity_type: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
fields: str = "tags,owners",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Search for entities belonging to a data product.
|
||||||
|
|
||||||
|
Uses OpenMetadata search API with queryFilter to find all assets
|
||||||
|
(metrics, tables, etc.) that are part of the specified data product.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_product_name: Name of the data product (e.g., "FoundryAIDataModel")
|
||||||
|
entity_type: Filter by entity type (e.g., "metric", "table"). Empty = all types.
|
||||||
|
limit: Maximum number of results
|
||||||
|
fields: Comma-separated list of fields to include
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of entity dictionaries
|
||||||
|
"""
|
||||||
|
must_clauses = [
|
||||||
|
{"term": {"dataProducts.name.keyword": data_product_name}},
|
||||||
|
]
|
||||||
|
if entity_type:
|
||||||
|
must_clauses.append({"term": {"entityType": entity_type}})
|
||||||
|
|
||||||
|
query_filter = json.dumps({"bool": {"must": must_clauses}})
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"q": "*",
|
||||||
|
"index": "all",
|
||||||
|
"size": limit,
|
||||||
|
"queryFilter": query_filter,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self._client.get("/api/v1/search/query", params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
hits = data.get("hits", {}).get("hits", [])
|
||||||
|
return [hit.get("_source", {}) for hit in hits]
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""Close HTTP client session."""
|
"""Close HTTP client session."""
|
||||||
self._client.close()
|
self._client.close()
|
||||||
|
|
|
||||||
|
|
@ -47,12 +47,15 @@ if [[ "$USERNAME" == "$CURRENT_USER" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Get user groups for info (safe extraction, no pipefail issues)
|
# Get user info (avoid using GROUPS - it's a bash special variable for current user's GIDs)
|
||||||
GROUPS=$(id -nG "$USERNAME" 2>/dev/null) || GROUPS="(unknown)"
|
USER_GROUPS=$(id -nG "$USERNAME" 2>/dev/null) || USER_GROUPS="(unknown)"
|
||||||
|
HOME_DIR="/home/$USERNAME"
|
||||||
|
HOME_EXISTS=false
|
||||||
|
[[ -d "$HOME_DIR" ]] && HOME_EXISTS=true
|
||||||
|
|
||||||
echo "Removing user: $USERNAME"
|
echo "Removing user: $USERNAME"
|
||||||
echo " Groups: $GROUPS"
|
echo " Groups: $USER_GROUPS"
|
||||||
echo " Home: /home/$USERNAME"
|
echo " Home: $HOME_DIR ($([ "$HOME_EXISTS" = true ] && echo "exists" || echo "already missing"))"
|
||||||
|
|
||||||
if [[ "$FORCE" != true ]]; then
|
if [[ "$FORCE" != true ]]; then
|
||||||
read -p "Are you sure? [y/N] " -n 1 -r
|
read -p "Are you sure? [y/N] " -n 1 -r
|
||||||
|
|
@ -65,16 +68,23 @@ fi
|
||||||
|
|
||||||
# Remove user and home directory
|
# Remove user and home directory
|
||||||
echo " Deleting OS user..."
|
echo " Deleting OS user..."
|
||||||
if userdel -r "$USERNAME" 2>/dev/null; then
|
USERDEL_ERR=$(userdel -r "$USERNAME" 2>&1)
|
||||||
echo " User and home directory removed"
|
USERDEL_EXIT=$?
|
||||||
|
if [[ $USERDEL_EXIT -eq 0 ]]; then
|
||||||
|
if [[ "$HOME_EXISTS" = true ]]; then
|
||||||
|
echo " User and home directory removed"
|
||||||
|
else
|
||||||
|
echo " User removed (home directory was already missing)"
|
||||||
|
fi
|
||||||
elif userdel "$USERNAME" 2>/dev/null; then
|
elif userdel "$USERNAME" 2>/dev/null; then
|
||||||
echo " User removed (userdel -r failed, cleaning up home manually)"
|
echo " User removed (userdel -r failed: $USERDEL_ERR)"
|
||||||
if [[ -d "/home/$USERNAME" ]]; then
|
if [[ -d "$HOME_DIR" ]]; then
|
||||||
rm -rf "/home/$USERNAME"
|
rm -rf "$HOME_DIR"
|
||||||
echo " Home directory /home/$USERNAME removed"
|
echo " Home directory $HOME_DIR removed"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Error: Failed to remove user '$USERNAME'"
|
echo "Error: Failed to remove user '$USERNAME'"
|
||||||
|
echo " userdel error: $USERDEL_ERR"
|
||||||
echo " Check if processes are running as this user: ps -u $USERNAME"
|
echo " Check if processes are running as this user: ps -u $USERNAME"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -123,23 +123,24 @@ def export_metrics(
|
||||||
docs_dir: Path,
|
docs_dir: Path,
|
||||||
catalog_url: str,
|
catalog_url: str,
|
||||||
filter_tag: str = "",
|
filter_tag: str = "",
|
||||||
|
data_product: str = "",
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Export metrics from OpenMetadata to YAML files.
|
Export metrics from OpenMetadata to YAML files.
|
||||||
|
|
||||||
For each metric:
|
For each metric:
|
||||||
1. Fetches all metrics from catalog API
|
1. Discovers metrics via data product (preferred) or fetches all + filters by tag
|
||||||
2. Filters by required tag (if configured)
|
2. Transforms each to YAML-compatible dict
|
||||||
3. Transforms each to YAML-compatible dict
|
3. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml
|
||||||
4. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml
|
4. Writes index file: {docs_dir}/metrics/metrics.yml
|
||||||
5. Writes index file: {docs_dir}/metrics/metrics.yml
|
5. Cleans up stale auto-generated files
|
||||||
6. Cleans up stale auto-generated files
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client: Initialized OpenMetadata API client
|
client: Initialized OpenMetadata API client
|
||||||
docs_dir: Base docs directory (e.g., /data/docs)
|
docs_dir: Base docs directory (e.g., /data/docs)
|
||||||
catalog_url: Catalog URL for header comments
|
catalog_url: Catalog URL for header comments
|
||||||
filter_tag: If set, only export metrics that have this tag (e.g., "AIAgent.FoundryAI")
|
filter_tag: If set, only export metrics that have this tag (e.g., "AIAgent.FoundryAI")
|
||||||
|
data_product: If set, discover metrics via data product assets (preferred over filter_tag)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Number of metrics exported
|
Number of metrics exported
|
||||||
|
|
@ -147,21 +148,38 @@ def export_metrics(
|
||||||
metrics_dir = docs_dir / "metrics"
|
metrics_dir = docs_dir / "metrics"
|
||||||
metrics_dir.mkdir(parents=True, exist_ok=True)
|
metrics_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Fetch all metrics with tags and owners
|
raw_metrics: List[Dict[str, Any]] = []
|
||||||
raw_metrics = client.get_metrics(limit=200, fields="tags,owners")
|
|
||||||
|
# Strategy 1: Discover metrics via data product (preferred)
|
||||||
|
if data_product:
|
||||||
|
try:
|
||||||
|
raw_metrics = client.search_by_data_product(
|
||||||
|
data_product_name=data_product,
|
||||||
|
entity_type="metric",
|
||||||
|
limit=200,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Data product '{data_product}': found {len(raw_metrics)} metrics"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Data product search failed, falling back to tag filter: {e}")
|
||||||
|
raw_metrics = []
|
||||||
|
|
||||||
|
# Strategy 2: Fallback to tag-based filter
|
||||||
if not raw_metrics:
|
if not raw_metrics:
|
||||||
logger.warning("No metrics returned from catalog - preserving existing files")
|
raw_metrics = client.get_metrics(limit=200, fields="tags,owners")
|
||||||
return 0
|
if not raw_metrics:
|
||||||
|
logger.warning("No metrics returned from catalog - preserving existing files")
|
||||||
|
return 0
|
||||||
|
|
||||||
logger.info(f"Fetched {len(raw_metrics)} metrics from catalog")
|
logger.info(f"Fetched {len(raw_metrics)} metrics from catalog")
|
||||||
|
|
||||||
# Filter by tag if configured
|
if filter_tag:
|
||||||
if filter_tag:
|
filtered = [m for m in raw_metrics if has_tag(m.get("tags", []), filter_tag)]
|
||||||
filtered = [m for m in raw_metrics if has_tag(m.get("tags", []), filter_tag)]
|
logger.info(
|
||||||
logger.info(
|
f"Tag filter '{filter_tag}': {len(filtered)}/{len(raw_metrics)} metrics matched"
|
||||||
f"Tag filter '{filter_tag}': {len(filtered)}/{len(raw_metrics)} metrics matched"
|
)
|
||||||
)
|
raw_metrics = filtered
|
||||||
raw_metrics = filtered
|
|
||||||
|
|
||||||
# Track which files we write (for cleanup)
|
# Track which files we write (for cleanup)
|
||||||
written_files: set[Path] = set()
|
written_files: set[Path] = set()
|
||||||
|
|
@ -412,12 +430,16 @@ def main() -> None:
|
||||||
logger.warning(f"Failed to initialize OpenMetadata client: {e}")
|
logger.warning(f"Failed to initialize OpenMetadata client: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Optional tag filter (only export metrics with this tag)
|
# Discovery config: data product (preferred) or tag filter (fallback)
|
||||||
filter_tag = om_config.get("filter_tag", "").strip()
|
filter_tag = om_config.get("filter_tag", "").strip()
|
||||||
|
data_product = om_config.get("data_product", "").strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Export metrics
|
# Export metrics
|
||||||
metrics_count = export_metrics(client, docs_dir, catalog_url, filter_tag=filter_tag)
|
metrics_count = export_metrics(
|
||||||
|
client, docs_dir, catalog_url,
|
||||||
|
filter_tag=filter_tag, data_product=data_product,
|
||||||
|
)
|
||||||
|
|
||||||
# Export tables
|
# Export tables
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue