diff --git a/config/instance.yaml.example b/config/instance.yaml.example index 4409de4..3995f8b 100644 --- a/config/instance.yaml.example +++ b/config/instance.yaml.example @@ -44,6 +44,34 @@ auth: google_client_id: "${GOOGLE_CLIENT_ID}" google_client_secret: "${GOOGLE_CLIENT_SECRET}" + # --- Webapp username shaping --- + # + # By default, a user's OS account is derived from their full email: + # e.psimecek@acme.com -> e_psimecek_acme_com + # + # Two options let you control this: + # + # username_strip_domain: true + # Use only the local part of the email (before @). + # Safe when allowed_domain ensures all users share a single domain. + # e.psimecek@acme.com -> e_psimecek + # Keeps usernames short and readable. + # + # username_prefix: "myapp_" + # Prepend a fixed string to every webapp-created account name. + # Necessary when an external identity system (GCP OS Login, LDAP, SAML) + # already creates OS accounts in /home/ using the same naming scheme. + # Without a prefix, the webapp sees those existing OS accounts and refuses + # to register new analyst accounts ("already in use by a system account"). + # With prefix "myapp_" and strip_domain true: + # e.psimecek@acme.com -> myapp_e_psimecek + # Linux enforces a 32-character username limit. Keep the prefix short. + # Changing or removing either option later will invalidate all existing + # analyst accounts. Use username_mapping (top-level) to bridge legacy accounts. + # + # username_strip_domain: false + # username_prefix: "" + # --- Theme (optional) --- # Customize colors, fonts, and shape to match your brand. # All values are optional - defaults provide a clean blue theme. diff --git a/connectors/openmetadata/enricher.py b/connectors/openmetadata/enricher.py index 496a44a..299981a 100644 --- a/connectors/openmetadata/enricher.py +++ b/connectors/openmetadata/enricher.py @@ -221,13 +221,15 @@ class CatalogEnricher: if extension: tier = extension.get("tier") or extension.get("Tier") + # Debug logging + logger.debug(f"Parsed catalog data: tags={tags}, owners={owners}, tier={tier}, extension_keys={list(extension.keys()) if extension else 'empty'}") + # Build catalog URL fqn = raw.get("fullyQualifiedName", "") catalog_url = None if fqn: - # Parse base URL from FQN context (would need base_url from config ideally) - # For now, construct a reasonable path - catalog_url = f"{self._client.base_url}/explore/{fqn}" + # Link to table entity page in OpenMetadata + catalog_url = f"{self._client.base_url}/table/{fqn}" return CatalogTableData( description=description, diff --git a/tests/test_username_generation.py b/tests/test_username_generation.py index e9e7fe4..9e6912e 100644 --- a/tests/test_username_generation.py +++ b/tests/test_username_generation.py @@ -2,7 +2,8 @@ import pytest -from webapp.user_service import get_username_from_email, RESERVED_USERNAMES +from webapp.user_service import get_username_from_email, get_webapp_username, RESERVED_USERNAMES +import webapp.user_service as user_service_module class TestGetUsernameFromEmail: @@ -49,3 +50,52 @@ class TestGetUsernameFromEmail: def test_subdomain_email(self): assert get_username_from_email("user@mail.acme.co.uk") == "user_mail_acme_co_uk" + + +class TestGetWebappUsername: + """Test get_webapp_username() with configurable prefix and domain stripping.""" + + def test_prefix_and_strip_domain(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek" + + def test_prefix_no_strip(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False) + assert get_webapp_username("e.psimecek@groupon.com") == "foundry_e_psimecek_groupon_com" + + def test_no_prefix_strip_domain(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek" + + def test_legacy_no_options(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", False) + assert get_webapp_username("e.psimecek@groupon.com") == "e_psimecek_groupon_com" + + def test_empty_email(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("") == "" + + def test_none_email(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username(None) == "" + + def test_no_at_sign(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "foundry_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("notanemail") == "" + + def test_uppercase_normalized(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "app_") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("John.Doe@ACME.COM") == "app_john_doe" + + def test_strip_domain_multiple_dots(self, monkeypatch): + monkeypatch.setattr(user_service_module, "_USERNAME_PREFIX", "") + monkeypatch.setattr(user_service_module, "_USERNAME_STRIP_DOMAIN", True) + assert get_webapp_username("first.middle.last@company.com") == "first_middle_last" diff --git a/webapp/app.py b/webapp/app.py index f8dde6b..e9810fa 100644 --- a/webapp/app.py +++ b/webapp/app.py @@ -43,7 +43,7 @@ from .user_service import ( UserInfo, check_user_exists, create_user, - get_username_from_email, + get_webapp_username, is_username_available, validate_ssh_key, ) @@ -562,7 +562,7 @@ def register_routes(app: Flask) -> None: """Show user dashboard with account info or registration form.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) # Check if user exists on the system user_info = check_user_exists(username) @@ -648,7 +648,7 @@ def register_routes(app: Flask) -> None: """Data catalog page.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) data_stats = _load_data_stats() catalog_data = _load_catalog_data() @@ -822,7 +822,7 @@ def register_routes(app: Flask) -> None: """Create a new analyst account.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) # Check if user already exists user_info = check_user_exists(username) @@ -858,7 +858,7 @@ def register_routes(app: Flask) -> None: """Verify a Telegram verification code and link the account.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) data = request.get_json(silent=True) or {} code = data.get("code", "").strip() @@ -880,7 +880,7 @@ def register_routes(app: Flask) -> None: """Unlink Telegram from the account.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) success, message = unlink_telegram(username) if success: @@ -894,7 +894,7 @@ def register_routes(app: Flask) -> None: """Get Telegram link status.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) status = get_telegram_status(username) return jsonify(status) @@ -951,7 +951,7 @@ def register_routes(app: Flask) -> None: """Get sync settings for current user.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) settings = get_sync_settings(username) return jsonify(settings) @@ -961,7 +961,7 @@ def register_routes(app: Flask) -> None: """Update sync settings for current user.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) data = request.get_json(silent=True) or {} datasets = data.get("datasets", {}) @@ -981,7 +981,7 @@ def register_routes(app: Flask) -> None: """Get per-table subscriptions for current user.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) subs = get_table_subscriptions(username) return jsonify(subs) @@ -991,7 +991,7 @@ def register_routes(app: Flask) -> None: """Update per-table subscriptions for current user.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) data = request.get_json(silent=True) or {} table_mode = data.get("table_mode", "all") @@ -1016,7 +1016,7 @@ def register_routes(app: Flask) -> None: """Corporate Memory knowledge browser page.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) # Get stats for header stats = get_memory_stats() @@ -1061,7 +1061,7 @@ def register_routes(app: Flask) -> None: # Get username for my_rules filter user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) # Limit per_page to reasonable maximum per_page = min(per_page, 100) @@ -1083,7 +1083,7 @@ def register_routes(app: Flask) -> None: """Get corporate memory statistics for dashboard.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) stats = get_memory_stats() user_stats = get_memory_user_stats(username) @@ -1099,7 +1099,7 @@ def register_routes(app: Flask) -> None: """Vote on a knowledge item.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) data = request.get_json(silent=True) or {} item_id = data.get("item_id") @@ -1124,7 +1124,7 @@ def register_routes(app: Flask) -> None: """Get current user's votes.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) votes = get_user_votes(username) return jsonify({"votes": votes}) diff --git a/webapp/auth.py b/webapp/auth.py index b901987..77583dc 100644 --- a/webapp/auth.py +++ b/webapp/auth.py @@ -48,10 +48,10 @@ def admin_required(f): return jsonify({"error": "Authentication required"}), 401 return redirect(url_for("auth.login")) - from .user_service import check_user_exists, get_username_from_email + from .user_service import check_user_exists, get_webapp_username email = session.get("user", {}).get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) user_info = check_user_exists(username) if not user_info.is_admin: diff --git a/webapp/desktop_auth.py b/webapp/desktop_auth.py index efe61e4..4c5529b 100644 --- a/webapp/desktop_auth.py +++ b/webapp/desktop_auth.py @@ -20,7 +20,7 @@ from flask import Blueprint, abort, jsonify, render_template, request, session from .auth import login_required from .config import Config -from .user_service import get_username_from_email +from .user_service import get_webapp_username logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def desktop_link(): """Render the desktop app authorization page.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) return render_template("desktop_link.html", username=username) @@ -142,7 +142,7 @@ def desktop_authorize(): """Generate a JWT token for the desktop app and return a redirect URL.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) token = _create_desktop_token(username) redirect_url = f"{Config.DESKTOP_URL_SCHEME}://auth?token={token}" @@ -184,7 +184,7 @@ def desktop_unlink(): """Unlink desktop app from the account.""" user = session.get("user", {}) email = user.get("email", "") - username = get_username_from_email(email) + username = get_webapp_username(email) success, message = unlink_desktop(username) if success: diff --git a/webapp/user_service.py b/webapp/user_service.py index 4a1d4b2..8f4844e 100644 --- a/webapp/user_service.py +++ b/webapp/user_service.py @@ -14,6 +14,21 @@ from dataclasses import dataclass logger = logging.getLogger(__name__) +def _load_username_config() -> tuple[str, bool]: + """Load username_prefix and username_strip_domain from instance config.""" + try: + from config.loader import load_instance_config, get_instance_value + config = load_instance_config() + prefix = get_instance_value(config, "auth", "username_prefix", default="") or "" + strip = get_instance_value(config, "auth", "username_strip_domain", default=False) + return prefix, bool(strip) + except Exception: + return "", False + + +_USERNAME_PREFIX, _USERNAME_STRIP_DOMAIN = _load_username_config() + + @dataclass class UserInfo: """Information about an existing system user.""" @@ -58,6 +73,33 @@ def get_username_from_email(email: str) -> str: return safe_username +def get_webapp_username(email: str) -> str: + """Convert email to webapp username, applying configured prefix and domain stripping. + + Controlled by two instance.yaml options under auth: + username_prefix: "foundry_" -> prepend to every username + username_strip_domain: true -> use only local part of email (safe on single-domain) + + Examples (prefix="foundry_", strip_domain=true): + e.psimecek@groupon.com -> foundry_e_psimecek + john.doe@groupon.com -> foundry_john_doe + + Examples (no prefix, no strip): + e.psimecek@groupon.com -> e_psimecek_groupon_com (legacy behaviour) + """ + if not email or "@" not in email: + return "" + + if _USERNAME_STRIP_DOMAIN: + base = email.split("@")[0].lower().replace(".", "_") + else: + base = get_username_from_email(email) + + if not base: + return "" + return f"{_USERNAME_PREFIX}{base}" if _USERNAME_PREFIX else base + + def is_username_available(username: str) -> tuple[bool, str]: """ Check if username is available for registration. @@ -172,6 +214,9 @@ def create_user(username: str, ssh_key: str) -> tuple[bool, str]: if not username or not re.match(r"^[a-z][a-z0-9._-]*$", username): return False, "Invalid username format" + if len(username) > 32: + return False, f"Username too long ({len(username)} chars, max 32). Check username_prefix in config." + is_valid, error = validate_ssh_key(ssh_key) if not is_valid: return False, error