Fix: correct OpenMetadata catalog URL path and add debug logging

- Change catalog URL from /explore/{fqn} to /table/{fqn}
- Add debug logging to see parsed tags, owners, tier from API response
This commit is contained in:
Petr 2026-03-12 14:34:12 +01:00
parent de66f6dd55
commit 14d75d6229
7 changed files with 151 additions and 26 deletions

View file

@ -44,6 +44,34 @@ auth:
google_client_id: "${GOOGLE_CLIENT_ID}" google_client_id: "${GOOGLE_CLIENT_ID}"
google_client_secret: "${GOOGLE_CLIENT_SECRET}" google_client_secret: "${GOOGLE_CLIENT_SECRET}"
# --- Webapp username shaping ---
#
# By default, a user's OS account is derived from their full email:
# e.psimecek@acme.com -> e_psimecek_acme_com
#
# Two options let you control this:
#
# username_strip_domain: true
# Use only the local part of the email (before @).
# Safe when allowed_domain ensures all users share a single domain.
# e.psimecek@acme.com -> e_psimecek
# Keeps usernames short and readable.
#
# username_prefix: "myapp_"
# Prepend a fixed string to every webapp-created account name.
# Necessary when an external identity system (GCP OS Login, LDAP, SAML)
# already creates OS accounts in /home/ using the same naming scheme.
# Without a prefix, the webapp sees those existing OS accounts and refuses
# to register new analyst accounts ("already in use by a system account").
# With prefix "myapp_" and strip_domain true:
# e.psimecek@acme.com -> myapp_e_psimecek
# Linux enforces a 32-character username limit. Keep the prefix short.
# Changing or removing either option later will invalidate all existing
# analyst accounts. Use username_mapping (top-level) to bridge legacy accounts.
#
# username_strip_domain: false
# username_prefix: ""
# --- Theme (optional) --- # --- Theme (optional) ---
# Customize colors, fonts, and shape to match your brand. # Customize colors, fonts, and shape to match your brand.
# All values are optional - defaults provide a clean blue theme. # All values are optional - defaults provide a clean blue theme.

View file

@ -221,13 +221,15 @@ class CatalogEnricher:
if extension: if extension:
tier = extension.get("tier") or extension.get("Tier") tier = extension.get("tier") or extension.get("Tier")
# Debug logging
logger.debug(f"Parsed catalog data: tags={tags}, owners={owners}, tier={tier}, extension_keys={list(extension.keys()) if extension else 'empty'}")
# Build catalog URL # Build catalog URL
fqn = raw.get("fullyQualifiedName", "") fqn = raw.get("fullyQualifiedName", "")
catalog_url = None catalog_url = None
if fqn: if fqn:
# Parse base URL from FQN context (would need base_url from config ideally) # Link to table entity page in OpenMetadata
# For now, construct a reasonable path catalog_url = f"{self._client.base_url}/table/{fqn}"
catalog_url = f"{self._client.base_url}/explore/{fqn}"
return CatalogTableData( return CatalogTableData(
description=description, description=description,

View file

@ -2,7 +2,8 @@
import pytest import pytest
from webapp.user_service import get_username_from_email, RESERVED_USERNAMES from webapp.user_service import get_username_from_email, get_webapp_username, RESERVED_USERNAMES
import webapp.user_service as user_service_module
class TestGetUsernameFromEmail: class TestGetUsernameFromEmail:
@ -49,3 +50,52 @@ class TestGetUsernameFromEmail:
def test_subdomain_email(self): def test_subdomain_email(self):
assert get_username_from_email("user@mail.acme.co.uk") == "user_mail_acme_co_uk" assert get_username_from_email("user@mail.acme.co.uk") == "user_mail_acme_co_uk"
class TestGetWebappUsername:
"""Test get_webapp_username() with configurable prefix and domain stripping."""
def test_prefix_and_strip_domain(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek"
def test_prefix_no_strip(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False)
assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek_groupon_com"
def test_no_prefix_strip_domain(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek"
def test_legacy_no_options(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False)
assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek_groupon_com"
def test_empty_email(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("") == ""
def test_none_email(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username(None) == ""
def test_no_at_sign(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("notanemail") == ""
def test_uppercase_normalized(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "app_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("John.Doe@ACME.COM") == "app_john_doe"
def test_strip_domain_multiple_dots(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("first.middle.last@company.com") == "first_middle_last"

View file

@ -43,7 +43,7 @@ from .user_service import (
UserInfo, UserInfo,
check_user_exists, check_user_exists,
create_user, create_user,
get_username_from_email, get_webapp_username,
is_username_available, is_username_available,
validate_ssh_key, validate_ssh_key,
) )
@ -562,7 +562,7 @@ def register_routes(app: Flask) -> None:
"""Show user dashboard with account info or registration form.""" """Show user dashboard with account info or registration form."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
# Check if user exists on the system # Check if user exists on the system
user_info = check_user_exists(username) user_info = check_user_exists(username)
@ -648,7 +648,7 @@ def register_routes(app: Flask) -> None:
"""Data catalog page.""" """Data catalog page."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
data_stats = _load_data_stats() data_stats = _load_data_stats()
catalog_data = _load_catalog_data() catalog_data = _load_catalog_data()
@ -822,7 +822,7 @@ def register_routes(app: Flask) -> None:
"""Create a new analyst account.""" """Create a new analyst account."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
# Check if user already exists # Check if user already exists
user_info = check_user_exists(username) user_info = check_user_exists(username)
@ -858,7 +858,7 @@ def register_routes(app: Flask) -> None:
"""Verify a Telegram verification code and link the account.""" """Verify a Telegram verification code and link the account."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
data = request.get_json(silent=True) or {} data = request.get_json(silent=True) or {}
code = data.get("code", "").strip() code = data.get("code", "").strip()
@ -880,7 +880,7 @@ def register_routes(app: Flask) -> None:
"""Unlink Telegram from the account.""" """Unlink Telegram from the account."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
success, message = unlink_telegram(username) success, message = unlink_telegram(username)
if success: if success:
@ -894,7 +894,7 @@ def register_routes(app: Flask) -> None:
"""Get Telegram link status.""" """Get Telegram link status."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
status = get_telegram_status(username) status = get_telegram_status(username)
return jsonify(status) return jsonify(status)
@ -951,7 +951,7 @@ def register_routes(app: Flask) -> None:
"""Get sync settings for current user.""" """Get sync settings for current user."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
settings = get_sync_settings(username) settings = get_sync_settings(username)
return jsonify(settings) return jsonify(settings)
@ -961,7 +961,7 @@ def register_routes(app: Flask) -> None:
"""Update sync settings for current user.""" """Update sync settings for current user."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
data = request.get_json(silent=True) or {} data = request.get_json(silent=True) or {}
datasets = data.get("datasets", {}) datasets = data.get("datasets", {})
@ -981,7 +981,7 @@ def register_routes(app: Flask) -> None:
"""Get per-table subscriptions for current user.""" """Get per-table subscriptions for current user."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
subs = get_table_subscriptions(username) subs = get_table_subscriptions(username)
return jsonify(subs) return jsonify(subs)
@ -991,7 +991,7 @@ def register_routes(app: Flask) -> None:
"""Update per-table subscriptions for current user.""" """Update per-table subscriptions for current user."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
data = request.get_json(silent=True) or {} data = request.get_json(silent=True) or {}
table_mode = data.get("table_mode", "all") table_mode = data.get("table_mode", "all")
@ -1016,7 +1016,7 @@ def register_routes(app: Flask) -> None:
"""Corporate Memory knowledge browser page.""" """Corporate Memory knowledge browser page."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
# Get stats for header # Get stats for header
stats = get_memory_stats() stats = get_memory_stats()
@ -1061,7 +1061,7 @@ def register_routes(app: Flask) -> None:
# Get username for my_rules filter # Get username for my_rules filter
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
# Limit per_page to reasonable maximum # Limit per_page to reasonable maximum
per_page = min(per_page, 100) per_page = min(per_page, 100)
@ -1083,7 +1083,7 @@ def register_routes(app: Flask) -> None:
"""Get corporate memory statistics for dashboard.""" """Get corporate memory statistics for dashboard."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
stats = get_memory_stats() stats = get_memory_stats()
user_stats = get_memory_user_stats(username) user_stats = get_memory_user_stats(username)
@ -1099,7 +1099,7 @@ def register_routes(app: Flask) -> None:
"""Vote on a knowledge item.""" """Vote on a knowledge item."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
data = request.get_json(silent=True) or {} data = request.get_json(silent=True) or {}
item_id = data.get("item_id") item_id = data.get("item_id")
@ -1124,7 +1124,7 @@ def register_routes(app: Flask) -> None:
"""Get current user's votes.""" """Get current user's votes."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
votes = get_user_votes(username) votes = get_user_votes(username)
return jsonify({"votes": votes}) return jsonify({"votes": votes})

View file

@ -48,10 +48,10 @@ def admin_required(f):
return jsonify({"error": "Authentication required"}), 401 return jsonify({"error": "Authentication required"}), 401
return redirect(url_for("auth.login")) return redirect(url_for("auth.login"))
from .user_service import check_user_exists, get_username_from_email from .user_service import check_user_exists, get_webapp_username
email = session.get("user", {}).get("email", "") email = session.get("user", {}).get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
user_info = check_user_exists(username) user_info = check_user_exists(username)
if not user_info.is_admin: if not user_info.is_admin:

View file

@ -20,7 +20,7 @@ from flask import Blueprint, abort, jsonify, render_template, request, session
from .auth import login_required from .auth import login_required
from .config import Config from .config import Config
from .user_service import get_username_from_email from .user_service import get_webapp_username
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -132,7 +132,7 @@ def desktop_link():
"""Render the desktop app authorization page.""" """Render the desktop app authorization page."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
return render_template("desktop_link.html", username=username) return render_template("desktop_link.html", username=username)
@ -142,7 +142,7 @@ def desktop_authorize():
"""Generate a JWT token for the desktop app and return a redirect URL.""" """Generate a JWT token for the desktop app and return a redirect URL."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
token = _create_desktop_token(username) token = _create_desktop_token(username)
redirect_url = f"{Config.DESKTOP_URL_SCHEME}://auth?token={token}" redirect_url = f"{Config.DESKTOP_URL_SCHEME}://auth?token={token}"
@ -184,7 +184,7 @@ def desktop_unlink():
"""Unlink desktop app from the account.""" """Unlink desktop app from the account."""
user = session.get("user", {}) user = session.get("user", {})
email = user.get("email", "") email = user.get("email", "")
username = get_username_from_email(email) username = get_webapp_username(email)
success, message = unlink_desktop(username) success, message = unlink_desktop(username)
if success: if success:

View file

@ -14,6 +14,21 @@ from dataclasses import dataclass
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _load_username_config() -> tuple[str, bool]:
"""Load username_prefix and username_strip_domain from instance config."""
try:
from config.loader import load_instance_config, get_instance_value
config = load_instance_config()
prefix = get_instance_value(config, "auth", "username_prefix", default="") or ""
strip = get_instance_value(config, "auth", "username_strip_domain", default=False)
return prefix, bool(strip)
except Exception:
return "", False
_USERNAME_PREFIX, _USERNAME_STRIP_DOMAIN = _load_username_config()
@dataclass @dataclass
class UserInfo: class UserInfo:
"""Information about an existing system user.""" """Information about an existing system user."""
@ -58,6 +73,33 @@ def get_username_from_email(email: str) -> str:
return safe_username return safe_username
def get_webapp_username(email: str) -> str:
"""Convert email to webapp username, applying configured prefix and domain stripping.
Controlled by two instance.yaml options under auth:
username_prefix: "foundry_" -> prepend to every username
username_strip_domain: true -> use only local part of email (safe on single-domain)
Examples (prefix="foundry_", strip_domain=true):
e.psimecek@groupon.com -> foundry_e_psimecek
john.doe@groupon.com -> foundry_john_doe
Examples (no prefix, no strip):
e.psimecek@groupon.com -> e_psimecek_groupon_com (legacy behaviour)
"""
if not email or "@" not in email:
return ""
if _USERNAME_STRIP_DOMAIN:
base = email.split("@")[0].lower().replace(".", "_")
else:
base = get_username_from_email(email)
if not base:
return ""
return f"{_USERNAME_PREFIX}{base}" if _USERNAME_PREFIX else base
def is_username_available(username: str) -> tuple[bool, str]: def is_username_available(username: str) -> tuple[bool, str]:
""" """
Check if username is available for registration. Check if username is available for registration.
@ -172,6 +214,9 @@ def create_user(username: str, ssh_key: str) -> tuple[bool, str]:
if not username or not re.match(r"^[a-z][a-z0-9._-]*$", username): if not username or not re.match(r"^[a-z][a-z0-9._-]*$", username):
return False, "Invalid username format" return False, "Invalid username format"
if len(username) > 32:
return False, f"Username too long ({len(username)} chars, max 32). Check username_prefix in config."
is_valid, error = validate_ssh_key(ssh_key) is_valid, error = validate_ssh_key(ssh_key)
if not is_valid: if not is_valid:
return False, error return False, error