agnes-the-ai-analyst/webapp/user_service.py
Petr 14d75d6229 Fix: correct OpenMetadata catalog URL path and add debug logging
- Change catalog URL from /explore/{fqn} to /table/{fqn}
- Add debug logging to see parsed tags, owners, tier from API response
2026-03-12 14:34:12 +01:00

252 lines
8.1 KiB
Python

"""
User management service.
Handles checking if users exist and creating new analyst accounts.
"""
import grp
import logging
import pwd
import re
import subprocess
from dataclasses import dataclass
logger = logging.getLogger(__name__)
def _load_username_config() -> tuple[str, bool]:
"""Load username_prefix and username_strip_domain from instance config."""
try:
from config.loader import load_instance_config, get_instance_value
config = load_instance_config()
prefix = get_instance_value(config, "auth", "username_prefix", default="") or ""
strip = get_instance_value(config, "auth", "username_strip_domain", default=False)
return prefix, bool(strip)
except Exception:
return "", False
_USERNAME_PREFIX, _USERNAME_STRIP_DOMAIN = _load_username_config()
@dataclass
class UserInfo:
"""Information about an existing system user."""
username: str
exists: bool
groups: list[str]
home_dir: str | None = None
is_analyst: bool = False
is_privileged: bool = False
is_admin: bool = False
# Reserved system usernames that cannot be used
RESERVED_USERNAMES = frozenset([
"root", "admin", "administrator", "www-data", "nginx", "apache",
"nobody", "daemon", "bin", "sys", "sync", "games", "man", "lp",
"mail", "news", "uucp", "proxy", "backup", "list", "irc", "gnats",
"sshd", "systemd", "messagebus", "syslog", "deploy", "git", "postgres",
"mysql", "redis", "mongodb", "elasticsearch", "docker", "ubuntu",
"debian", "centos", "data", "test", "guest", "user", "ftp", "http",
])
def get_username_from_email(email: str) -> str:
"""
Convert email address to a unique system username.
Always uses the full email to avoid collisions:
admin@test.com -> admin_test_com
pavel@groupon.com -> pavel_groupon_com
john.doe@acme.com -> john_doe_acme_com
This ensures uniqueness across multiple domains and avoids
collisions with reserved system usernames like 'admin', 'test', etc.
"""
if not email or "@" not in email:
return ""
# Full email, normalized: replace @ and . with underscores
safe_username = email.lower().replace("@", "_").replace(".", "_")
return safe_username
def get_webapp_username(email: str) -> str:
"""Convert email to webapp username, applying configured prefix and domain stripping.
Controlled by two instance.yaml options under auth:
username_prefix: "foundry_" -> prepend to every username
username_strip_domain: true -> use only local part of email (safe on single-domain)
Examples (prefix="foundry_", strip_domain=true):
e.psimecek@groupon.com -> foundry_e_psimecek
john.doe@groupon.com -> foundry_john_doe
Examples (no prefix, no strip):
e.psimecek@groupon.com -> e_psimecek_groupon_com (legacy behaviour)
"""
if not email or "@" not in email:
return ""
if _USERNAME_STRIP_DOMAIN:
base = email.split("@")[0].lower().replace(".", "_")
else:
base = get_username_from_email(email)
if not base:
return ""
return f"{_USERNAME_PREFIX}{base}" if _USERNAME_PREFIX else base
def is_username_available(username: str) -> tuple[bool, str]:
"""
Check if username is available for registration.
Returns (is_available, reason).
A username is NOT available if:
- It's in the reserved list
- It already exists as a system user who is NOT a dataread analyst
"""
if not username:
return False, "Username cannot be empty"
if username in RESERVED_USERNAMES:
return False, f"Username '{username}' is reserved for system use"
# Check if user exists on the system
user_info = check_user_exists(username)
if user_info.exists:
# User exists - check if it's an analyst account (created by this system)
# Analysts will have the 'dataread' group
if user_info.is_analyst:
# This is an existing analyst - they can log in but not re-register
return False, "Account already exists"
else:
# This is a system account (not created by add-analyst)
return False, f"Username '{username}' is already in use by a system account"
return True, ""
def check_user_exists(username: str) -> UserInfo:
"""
Check if a system user exists and get their info.
Returns UserInfo with exists=False if user doesn't exist.
"""
try:
pw = pwd.getpwnam(username)
# Get all groups for this user
groups = []
for g in grp.getgrall():
if username in g.gr_mem:
groups.append(g.gr_name)
# Also add primary group
try:
primary_group = grp.getgrgid(pw.pw_gid).gr_name
if primary_group not in groups:
groups.append(primary_group)
except KeyError:
pass
return UserInfo(
username=username,
exists=True,
groups=sorted(groups),
home_dir=pw.pw_dir,
is_analyst="dataread" in groups,
is_privileged="data-private" in groups,
is_admin="sudo" in groups or "data-ops" in groups,
)
except KeyError:
# User doesn't exist
return UserInfo(
username=username,
exists=False,
groups=[],
)
def validate_ssh_key(ssh_key: str) -> tuple[bool, str]:
"""
Validate SSH public key format.
Returns (is_valid, error_message).
"""
if not ssh_key:
return False, "SSH key is required"
# Normalize whitespace: collapse newlines/tabs/multiple spaces to single spaces
ssh_key = " ".join(ssh_key.split())
# Check for basic SSH key format
# Supports: ssh-rsa, ssh-ed25519, ecdsa-sha2-nistp256, etc.
key_pattern = r"^(ssh-rsa|ssh-ed25519|ecdsa-sha2-nistp\d+|ssh-dss)\s+[A-Za-z0-9+/=]+(\s+.+)?$"
if not re.match(key_pattern, ssh_key):
return False, "Invalid SSH key format. Key should start with 'ssh-rsa', 'ssh-ed25519', etc."
# Check minimum length (RSA keys are typically 372+ chars for 2048 bit)
if len(ssh_key) < 80:
return False, "SSH key appears too short"
# Check for private key (should never be submitted)
if "PRIVATE KEY" in ssh_key:
return False, "This appears to be a private key. Please provide your PUBLIC key instead."
return True, ""
def create_user(username: str, ssh_key: str) -> tuple[bool, str]:
"""
Create a new standard analyst user.
Uses sudo to call add-analyst script.
Returns (success, message).
"""
# Validate inputs
if not username or not re.match(r"^[a-z][a-z0-9._-]*$", username):
return False, "Invalid username format"
if len(username) > 32:
return False, f"Username too long ({len(username)} chars, max 32). Check username_prefix in config."
is_valid, error = validate_ssh_key(ssh_key)
if not is_valid:
return False, error
# Normalize whitespace: ensure key is a single line
ssh_key = " ".join(ssh_key.split())
try:
# Call add-analyst via sudo
result = subprocess.run(
["/usr/bin/sudo", "/usr/local/bin/add-analyst", username, ssh_key],
capture_output=True,
text=True,
timeout=30,
)
if result.returncode == 0:
logger.info(f"Successfully created user: {username}")
return True, f"User '{username}' created successfully"
else:
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
logger.error(f"Failed to create user {username}: {error_msg}")
return False, f"Failed to create user: {error_msg}"
except subprocess.TimeoutExpired:
logger.error(f"Timeout creating user: {username}")
return False, "User creation timed out"
except FileNotFoundError:
logger.error("add-analyst script not found")
return False, "User creation script not found on server"
except Exception as e:
logger.exception(f"Error creating user {username}: {e}")
return False, f"Error creating user: {str(e)}"