# AI Data Analyst - Instance Configuration # ========================================== # This is the main configuration file for your instance. # Copy to instance.yaml and fill in your values. # # SECRET VALUES use ${ENV_VAR} syntax - actual values go in .env file. # Non-secret values are set directly here. # --- Instance branding --- instance: name: "AI Data Analyst" subtitle: "Your Organization" copyright: "Your Organization" # logo_svg: Full element for header logo (optional, default: Keboola logo) # Example: 'Logo' # --- Server --- server: hostname: "" # DNS name (e.g., "data.acme.com") host: "" # IP address app_dir: "/opt/data-analyst" # Installation directory # --- Admin users --- # Manage the server, own data files, get unlimited resource limits. # SSH keys are used by server/setup.sh during provisioning. admins: - username: "admin" ssh_public_key: "ssh-ed25519 AAAA..." # --- Deployment --- deployment: method: "manual" # manual | github_actions repo_url: "" # e.g., "git@github.com:acme/ai-data-analyst.git" branch: "main" # --- Authentication --- # At minimum, set allowed_domain and webapp_secret_key. # Email magic link auth works out of the box (no external service needed). # Google OAuth is optional - add credentials to enable it. auth: allowed_domain: "" # Email domain(s) for login, comma-separated (e.g., "acme.com" or "acme.com, partner.org") webapp_secret_key: "${WEBAPP_SECRET_KEY}" # Optional: Google OAuth (if not set, only email magic link is available) google_client_id: "${GOOGLE_CLIENT_ID}" google_client_secret: "${GOOGLE_CLIENT_SECRET}" # --- Webapp username shaping --- # # By default, a user's OS account is derived from their full email: # e.psimecek@acme.com -> e_psimecek_acme_com # # Two options let you control this: # # username_strip_domain: true # Use only the local part of the email (before @). # Safe when allowed_domain ensures all users share a single domain. # e.psimecek@acme.com -> e_psimecek # Keeps usernames short and readable. # # username_prefix: "myapp_" # Prepend a fixed string to every webapp-created account name. # Necessary when an external identity system (GCP OS Login, LDAP, SAML) # already creates OS accounts in /home/ using the same naming scheme. # Without a prefix, the webapp sees those existing OS accounts and refuses # to register new analyst accounts ("already in use by a system account"). # With prefix "myapp_" and strip_domain true: # e.psimecek@acme.com -> myapp_e_psimecek # Linux enforces a 32-character username limit. Keep the prefix short. # Changing or removing either option later will invalidate all existing # analyst accounts. Use username_mapping (top-level) to bridge legacy accounts. # # username_strip_domain: false # username_prefix: "" # --- Theme (optional) --- # Customize colors, fonts, and shape to match your brand. # All values are optional - defaults provide a clean blue theme. # See docs/theme-reference.html for a visual guide. theme: # primary: "#0073D1" # Main brand color (buttons, links, accents) # primary_dark: "#005BA3" # Hover/active state of primary # primary_light: "rgba(0, 115, 209, 0.1)" # Light tint backgrounds # text_primary: "#1A253C" # Main text color # text_secondary: "#6B7280" # Muted/secondary text # background: "#F5F7FA" # Page background # surface: "#FFFFFF" # Card/panel background # border: "#E5E7EB" # Borders and dividers # font_primary: "'Inter', system-ui, sans-serif" # font_url: "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" # radius: "6px" # Border radius (cards, buttons, inputs) # success: "#10B77F" # warning: "#F59F0A" # error: "#EA580C" # --- Data source --- data_source: type: "keboola" # keboola | bigquery | local keboola: storage_token: "${KEBOOLA_STORAGE_TOKEN}" stack_url: "" # e.g., "https://connection.keboola.com" project_id: "" bigquery: project: "${BIGQUERY_PROJECT}" # GCP project for job execution/billing location: "${BIGQUERY_LOCATION}" # BigQuery location (e.g., "us-central1", "US") # Uses ADC (Application Default Credentials) - VM service account on GCP # Data can live in a different project -- use fully-qualified table IDs in data_description.md # --- OpenMetadata catalog (optional - Groupon-specific) --- # Enriches table and column metadata from OpenMetadata REST API. # If not configured, app works normally without catalog enrichment. # openmetadata: # url: "https://your-catalog.example.com" # token: "${OPENMETADATA_TOKEN}" # JWT bearer token # cache_ttl_seconds: 3600 # Cache TTL in seconds # --- Email delivery (optional, for magic link auth) --- # Without SMTP, magic links are shown directly in browser (development mode). # For production, configure any SMTP relay (Gmail, Mailgun, SendGrid SMTP, etc.) email: from_address: "noreply@example.com" from_name: "AI Data Analyst" smtp_host: "${SMTP_HOST}" # e.g., "smtp.gmail.com" smtp_port: 587 # 587 for STARTTLS, 465 for SSL smtp_user: "${SMTP_USER}" smtp_password: "${SMTP_PASSWORD}" # --- Desktop app (optional) --- desktop: jwt_issuer: "data-analyst" jwt_secret: "${DESKTOP_JWT_SECRET}" url_scheme: "data-analyst" # --- Telegram notifications (optional) --- telegram: bot_token: "${TELEGRAM_BOT_TOKEN}" bot_username: "" domain_suffix: "" # --- Jira integration (optional) --- jira: domain: "" email: "" api_token: "${JIRA_API_TOKEN}" webhook_secret: "${JIRA_WEBHOOK_SECRET}" sla_email: "" sla_api_token: "${JIRA_SLA_API_TOKEN}" cloud_id: "" # --- Corporate Memory AI (optional) --- ai: anthropic_api_key: "${ANTHROPIC_API_KEY}" # --- User display (for Corporate Memory avatars) --- users: {} # --- Username mapping (webapp email -> server username, only if different) --- username_mapping: {} # --- Optional datasets (sync settings UI) --- datasets: {} # --- Data catalog --- catalog: categories: {} order: [] # --- Data profiler (optional) --- # profiler: # sample_size: 500000 # If table > this, sample this many rows; otherwise use all # max_categorical_distinct: 50 # Treat as categorical if unique <= this # top_values_limit: 10 # Top values per categorical column # histogram_bins: 15 # Bins in histogram visualizations # sample_rows_limit: 5 # Sample rows to show in UI "Sample" tab # alert_high_missing_pct: 30.0 # Alert threshold for high missing % # alert_missing_pct: 5.0 # Alert threshold for missing % # alert_imbalance_pct: 60.0 # Alert threshold for imbalance % # alert_high_cardinality: 50 # Alert threshold for high cardinality columns