Fix: correct OpenMetadata catalog URL path and add debug logging

- Change catalog URL from /explore/{fqn} to /table/{fqn}
- Add debug logging to see parsed tags, owners, tier from API response
This commit is contained in:
Petr 2026-03-12 14:34:12 +01:00
parent de66f6dd55
commit 14d75d6229
7 changed files with 151 additions and 26 deletions

View file

@ -44,6 +44,34 @@ auth:
google_client_id: "${GOOGLE_CLIENT_ID}"
google_client_secret: "${GOOGLE_CLIENT_SECRET}"
# --- Webapp username shaping ---
#
# By default, a user's OS account is derived from their full email:
# e.psimecek@acme.com -> e_psimecek_acme_com
#
# Two options let you control this:
#
# username_strip_domain: true
# Use only the local part of the email (before @).
# Safe when allowed_domain ensures all users share a single domain.
# e.psimecek@acme.com -> e_psimecek
# Keeps usernames short and readable.
#
# username_prefix: "myapp_"
# Prepend a fixed string to every webapp-created account name.
# Necessary when an external identity system (GCP OS Login, LDAP, SAML)
# already creates OS accounts in /home/ using the same naming scheme.
# Without a prefix, the webapp sees those existing OS accounts and refuses
# to register new analyst accounts ("already in use by a system account").
# With prefix "myapp_" and strip_domain true:
# e.psimecek@acme.com -> myapp_e_psimecek
# Linux enforces a 32-character username limit. Keep the prefix short.
# Changing or removing either option later will invalidate all existing
# analyst accounts. Use username_mapping (top-level) to bridge legacy accounts.
#
# username_strip_domain: false
# username_prefix: ""
# --- Theme (optional) ---
# Customize colors, fonts, and shape to match your brand.
# All values are optional - defaults provide a clean blue theme.

View file

@ -221,13 +221,15 @@ class CatalogEnricher:
if extension:
tier = extension.get("tier") or extension.get("Tier")
# Debug logging
logger.debug(f"Parsed catalog data: tags={tags}, owners={owners}, tier={tier}, extension_keys={list(extension.keys()) if extension else 'empty'}")
# Build catalog URL
fqn = raw.get("fullyQualifiedName", "")
catalog_url = None
if fqn:
# Parse base URL from FQN context (would need base_url from config ideally)
# For now, construct a reasonable path
catalog_url = f"{self._client.base_url}/explore/{fqn}"
# Link to table entity page in OpenMetadata
catalog_url = f"{self._client.base_url}/table/{fqn}"
return CatalogTableData(
description=description,

View file

@ -2,7 +2,8 @@
import pytest
from webapp.user_service import get_username_from_email, RESERVED_USERNAMES
from webapp.user_service import get_username_from_email, get_webapp_username, RESERVED_USERNAMES
import webapp.user_service as user_service_module
class TestGetUsernameFromEmail:
@ -49,3 +50,52 @@ class TestGetUsernameFromEmail:
def test_subdomain_email(self):
assert get_username_from_email("user@mail.acme.co.uk") == "user_mail_acme_co_uk"
class TestGetWebappUsername:
"""Test get_webapp_username() with configurable prefix and domain stripping."""
def test_prefix_and_strip_domain(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek"
def test_prefix_no_strip(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False)
assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek_groupon_com"
def test_no_prefix_strip_domain(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek"
def test_legacy_no_options(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False)
assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek_groupon_com"
def test_empty_email(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("") == ""
def test_none_email(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username(None) == ""
def test_no_at_sign(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("notanemail") == ""
def test_uppercase_normalized(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "app_")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("John.Doe@ACME.COM") == "app_john_doe"
def test_strip_domain_multiple_dots(self, monkeypatch):
monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "")
monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True)
assert get_webapp_username("first.middle.last@company.com") == "first_middle_last"

View file

@ -43,7 +43,7 @@ from .user_service import (
UserInfo,
check_user_exists,
create_user,
get_username_from_email,
get_webapp_username,
is_username_available,
validate_ssh_key,
)
@ -562,7 +562,7 @@ def register_routes(app: Flask) -> None:
"""Show user dashboard with account info or registration form."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
# Check if user exists on the system
user_info = check_user_exists(username)
@ -648,7 +648,7 @@ def register_routes(app: Flask) -> None:
"""Data catalog page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
data_stats = _load_data_stats()
catalog_data = _load_catalog_data()
@ -822,7 +822,7 @@ def register_routes(app: Flask) -> None:
"""Create a new analyst account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
# Check if user already exists
user_info = check_user_exists(username)
@ -858,7 +858,7 @@ def register_routes(app: Flask) -> None:
"""Verify a Telegram verification code and link the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
code = data.get("code", "").strip()
@ -880,7 +880,7 @@ def register_routes(app: Flask) -> None:
"""Unlink Telegram from the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
success, message = unlink_telegram(username)
if success:
@ -894,7 +894,7 @@ def register_routes(app: Flask) -> None:
"""Get Telegram link status."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
status = get_telegram_status(username)
return jsonify(status)
@ -951,7 +951,7 @@ def register_routes(app: Flask) -> None:
"""Get sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
settings = get_sync_settings(username)
return jsonify(settings)
@ -961,7 +961,7 @@ def register_routes(app: Flask) -> None:
"""Update sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
datasets = data.get("datasets", {})
@ -981,7 +981,7 @@ def register_routes(app: Flask) -> None:
"""Get per-table subscriptions for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
subs = get_table_subscriptions(username)
return jsonify(subs)
@ -991,7 +991,7 @@ def register_routes(app: Flask) -> None:
"""Update per-table subscriptions for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
table_mode = data.get("table_mode", "all")
@ -1016,7 +1016,7 @@ def register_routes(app: Flask) -> None:
"""Corporate Memory knowledge browser page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
# Get stats for header
stats = get_memory_stats()
@ -1061,7 +1061,7 @@ def register_routes(app: Flask) -> None:
# Get username for my_rules filter
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
# Limit per_page to reasonable maximum
per_page = min(per_page, 100)
@ -1083,7 +1083,7 @@ def register_routes(app: Flask) -> None:
"""Get corporate memory statistics for dashboard."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
stats = get_memory_stats()
user_stats = get_memory_user_stats(username)
@ -1099,7 +1099,7 @@ def register_routes(app: Flask) -> None:
"""Vote on a knowledge item."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
item_id = data.get("item_id")
@ -1124,7 +1124,7 @@ def register_routes(app: Flask) -> None:
"""Get current user's votes."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
votes = get_user_votes(username)
return jsonify({"votes": votes})

View file

@ -48,10 +48,10 @@ def admin_required(f):
return jsonify({"error": "Authentication required"}), 401
return redirect(url_for("auth.login"))
from .user_service import check_user_exists, get_username_from_email
from .user_service import check_user_exists, get_webapp_username
email = session.get("user", {}).get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
user_info = check_user_exists(username)
if not user_info.is_admin:

View file

@ -20,7 +20,7 @@ from flask import Blueprint, abort, jsonify, render_template, request, session
from .auth import login_required
from .config import Config
from .user_service import get_username_from_email
from .user_service import get_webapp_username
logger = logging.getLogger(__name__)
@ -132,7 +132,7 @@ def desktop_link():
"""Render the desktop app authorization page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
return render_template("desktop_link.html", username=username)
@ -142,7 +142,7 @@ def desktop_authorize():
"""Generate a JWT token for the desktop app and return a redirect URL."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
token = _create_desktop_token(username)
redirect_url = f"{Config.DESKTOP_URL_SCHEME}://auth?token={token}"
@ -184,7 +184,7 @@ def desktop_unlink():
"""Unlink desktop app from the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
username = get_webapp_username(email)
success, message = unlink_desktop(username)
if success:

View file

@ -14,6 +14,21 @@ from dataclasses import dataclass
logger = logging.getLogger(__name__)
def _load_username_config() -> tuple[str, bool]:
"""Load username_prefix and username_strip_domain from instance config."""
try:
from config.loader import load_instance_config, get_instance_value
config = load_instance_config()
prefix = get_instance_value(config, "auth", "username_prefix", default="") or ""
strip = get_instance_value(config, "auth", "username_strip_domain", default=False)
return prefix, bool(strip)
except Exception:
return "", False
_USERNAME_PREFIX, _USERNAME_STRIP_DOMAIN = _load_username_config()
@dataclass
class UserInfo:
"""Information about an existing system user."""
@ -58,6 +73,33 @@ def get_username_from_email(email: str) -> str:
return safe_username
def get_webapp_username(email: str) -> str:
"""Convert email to webapp username, applying configured prefix and domain stripping.
Controlled by two instance.yaml options under auth:
username_prefix: "foundry_" -> prepend to every username
username_strip_domain: true -> use only local part of email (safe on single-domain)
Examples (prefix="foundry_", strip_domain=true):
e.psimecek@groupon.com -> foundry_e_psimecek
john.doe@groupon.com -> foundry_john_doe
Examples (no prefix, no strip):
e.psimecek@groupon.com -> e_psimecek_groupon_com (legacy behaviour)
"""
if not email or "@" not in email:
return ""
if _USERNAME_STRIP_DOMAIN:
base = email.split("@")[0].lower().replace(".", "_")
else:
base = get_username_from_email(email)
if not base:
return ""
return f"{_USERNAME_PREFIX}{base}" if _USERNAME_PREFIX else base
def is_username_available(username: str) -> tuple[bool, str]:
"""
Check if username is available for registration.
@ -172,6 +214,9 @@ def create_user(username: str, ssh_key: str) -> tuple[bool, str]:
if not username or not re.match(r"^[a-z][a-z0-9._-]*$", username):
return False, "Invalid username format"
if len(username) > 32:
return False, f"Username too long ({len(username)} chars, max 32). Check username_prefix in config."
is_valid, error = validate_ssh_key(ssh_key)
if not is_valid:
return False, error