diff --git a/connectors/openmetadata/client.py b/connectors/openmetadata/client.py index f9623cc..5ccc102 100644 --- a/connectors/openmetadata/client.py +++ b/connectors/openmetadata/client.py @@ -8,6 +8,7 @@ Low-level HTTP wrapper for OpenMetadata REST API with these functions: 4. Proper error handling and logging """ +import json import logging from typing import Dict, List, Optional, Any import warnings @@ -138,6 +139,50 @@ class OpenMetadataClient: return response.json() + def search_by_data_product( + self, + data_product_name: str, + entity_type: str = "", + limit: int = 50, + fields: str = "tags,owners", + ) -> List[Dict[str, Any]]: + """ + Search for entities belonging to a data product. + + Uses OpenMetadata search API with queryFilter to find all assets + (metrics, tables, etc.) that are part of the specified data product. + + Args: + data_product_name: Name of the data product (e.g., "FoundryAIDataModel") + entity_type: Filter by entity type (e.g., "metric", "table"). Empty = all types. + limit: Maximum number of results + fields: Comma-separated list of fields to include + + Returns: + List of entity dictionaries + """ + must_clauses = [ + {"term": {"dataProducts.name.keyword": data_product_name}}, + ] + if entity_type: + must_clauses.append({"term": {"entityType": entity_type}}) + + query_filter = json.dumps({"bool": {"must": must_clauses}}) + + params = { + "q": "*", + "index": "all", + "size": limit, + "queryFilter": query_filter, + } + + response = self._client.get("/api/v1/search/query", params=params) + response.raise_for_status() + + data = response.json() + hits = data.get("hits", {}).get("hits", []) + return [hit.get("_source", {}) for hit in hits] + def close(self): """Close HTTP client session.""" self._client.close() diff --git a/server/bin/remove-analyst b/server/bin/remove-analyst index 4faaaa4..0047f0e 100755 --- a/server/bin/remove-analyst +++ b/server/bin/remove-analyst @@ -47,12 +47,15 @@ if [[ "$USERNAME" == "$CURRENT_USER" ]]; then exit 1 fi -# Get user groups for info (safe extraction, no pipefail issues) -GROUPS=$(id -nG "$USERNAME" 2>/dev/null) || GROUPS="(unknown)" +# Get user info (avoid using GROUPS - it's a bash special variable for current user's GIDs) +USER_GROUPS=$(id -nG "$USERNAME" 2>/dev/null) || USER_GROUPS="(unknown)" +HOME_DIR="/home/$USERNAME" +HOME_EXISTS=false +[[ -d "$HOME_DIR" ]] && HOME_EXISTS=true echo "Removing user: $USERNAME" -echo " Groups: $GROUPS" -echo " Home: /home/$USERNAME" +echo " Groups: $USER_GROUPS" +echo " Home: $HOME_DIR ($([ "$HOME_EXISTS" = true ] && echo "exists" || echo "already missing"))" if [[ "$FORCE" != true ]]; then read -p "Are you sure? [y/N] " -n 1 -r @@ -65,16 +68,23 @@ fi # Remove user and home directory echo " Deleting OS user..." -if userdel -r "$USERNAME" 2>/dev/null; then - echo " User and home directory removed" +USERDEL_ERR=$(userdel -r "$USERNAME" 2>&1) +USERDEL_EXIT=$? +if [[ $USERDEL_EXIT -eq 0 ]]; then + if [[ "$HOME_EXISTS" = true ]]; then + echo " User and home directory removed" + else + echo " User removed (home directory was already missing)" + fi elif userdel "$USERNAME" 2>/dev/null; then - echo " User removed (userdel -r failed, cleaning up home manually)" - if [[ -d "/home/$USERNAME" ]]; then - rm -rf "/home/$USERNAME" - echo " Home directory /home/$USERNAME removed" + echo " User removed (userdel -r failed: $USERDEL_ERR)" + if [[ -d "$HOME_DIR" ]]; then + rm -rf "$HOME_DIR" + echo " Home directory $HOME_DIR removed" fi else echo "Error: Failed to remove user '$USERNAME'" + echo " userdel error: $USERDEL_ERR" echo " Check if processes are running as this user: ps -u $USERNAME" exit 1 fi diff --git a/src/catalog_export.py b/src/catalog_export.py index 83c17c2..e306c38 100644 --- a/src/catalog_export.py +++ b/src/catalog_export.py @@ -123,23 +123,24 @@ def export_metrics( docs_dir: Path, catalog_url: str, filter_tag: str = "", + data_product: str = "", ) -> int: """ Export metrics from OpenMetadata to YAML files. For each metric: - 1. Fetches all metrics from catalog API - 2. Filters by required tag (if configured) - 3. Transforms each to YAML-compatible dict - 4. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml - 5. Writes index file: {docs_dir}/metrics/metrics.yml - 6. Cleans up stale auto-generated files + 1. Discovers metrics via data product (preferred) or fetches all + filters by tag + 2. Transforms each to YAML-compatible dict + 3. Writes individual YAML files: {docs_dir}/metrics/{category}/{name}.yml + 4. Writes index file: {docs_dir}/metrics/metrics.yml + 5. Cleans up stale auto-generated files Args: client: Initialized OpenMetadata API client docs_dir: Base docs directory (e.g., /data/docs) catalog_url: Catalog URL for header comments filter_tag: If set, only export metrics that have this tag (e.g., "AIAgent.FoundryAI") + data_product: If set, discover metrics via data product assets (preferred over filter_tag) Returns: Number of metrics exported @@ -147,21 +148,38 @@ def export_metrics( metrics_dir = docs_dir / "metrics" metrics_dir.mkdir(parents=True, exist_ok=True) - # Fetch all metrics with tags and owners - raw_metrics = client.get_metrics(limit=200, fields="tags,owners") + raw_metrics: List[Dict[str, Any]] = [] + + # Strategy 1: Discover metrics via data product (preferred) + if data_product: + try: + raw_metrics = client.search_by_data_product( + data_product_name=data_product, + entity_type="metric", + limit=200, + ) + logger.info( + f"Data product '{data_product}': found {len(raw_metrics)} metrics" + ) + except Exception as e: + logger.warning(f"Data product search failed, falling back to tag filter: {e}") + raw_metrics = [] + + # Strategy 2: Fallback to tag-based filter if not raw_metrics: - logger.warning("No metrics returned from catalog - preserving existing files") - return 0 + raw_metrics = client.get_metrics(limit=200, fields="tags,owners") + if not raw_metrics: + logger.warning("No metrics returned from catalog - preserving existing files") + return 0 - logger.info(f"Fetched {len(raw_metrics)} metrics from catalog") + logger.info(f"Fetched {len(raw_metrics)} metrics from catalog") - # Filter by tag if configured - if filter_tag: - filtered = [m for m in raw_metrics if has_tag(m.get("tags", []), filter_tag)] - logger.info( - f"Tag filter '{filter_tag}': {len(filtered)}/{len(raw_metrics)} metrics matched" - ) - raw_metrics = filtered + if filter_tag: + filtered = [m for m in raw_metrics if has_tag(m.get("tags", []), filter_tag)] + logger.info( + f"Tag filter '{filter_tag}': {len(filtered)}/{len(raw_metrics)} metrics matched" + ) + raw_metrics = filtered # Track which files we write (for cleanup) written_files: set[Path] = set() @@ -412,12 +430,16 @@ def main() -> None: logger.warning(f"Failed to initialize OpenMetadata client: {e}") return - # Optional tag filter (only export metrics with this tag) + # Discovery config: data product (preferred) or tag filter (fallback) filter_tag = om_config.get("filter_tag", "").strip() + data_product = om_config.get("data_product", "").strip() try: # Export metrics - metrics_count = export_metrics(client, docs_dir, catalog_url, filter_tag=filter_tag) + metrics_count = export_metrics( + client, docs_dir, catalog_url, + filter_tag=filter_tag, data_product=data_product, + ) # Export tables try: