Generate setup instructions from bootstrap.yaml (single source of truth)

- Rewrite bootstrap.yaml as clean structured YAML with steps, commands, descriptions, conditions, and notes - Add _generate_setup_instructions() in app.py that reads YAML, substitutes placeholders, and produces clipboard-ready plain text - Replace 50-line hardcoded JS string builder with single tojson variable - All setup instructions now editable in one YAML file
2026-03-15 00:37:19 +01:00 · 2026-03-15 00:37:19 +01:00 · 508d92771f
commit 508d92771f
parent 85c07732b2
3 changed files with 160 additions and 334 deletions
--- a/docs/setup/bootstrap.yaml
+++ b/docs/setup/bootstrap.yaml
@ -1,280 +1,80 @@
-version: "1.0"
+# AI Data Analyst - Setup Instructions
-project_name: "ai_data_analyst"
+#
-project_dir: "."
+# Single source of truth for local environment setup.
-
+# Webapp reads this, substitutes placeholders, and generates clipboard text.
-# Placeholders filled by webapp per-user:
+#
 # Placeholders (filled from instance.yaml by webapp):
 #   {server_host}  - server IP or hostname
-#   {ssh_alias}    - SSH config alias (default: "data-analyst", configurable to avoid conflicts)
+#   {ssh_alias}    - SSH config alias (instance.yaml: server.ssh_alias)
-#   {ssh_key}      - SSH private key path (default: ~/.ssh/data_analyst_server)
+#   {ssh_key}      - SSH private key path (instance.yaml: server.ssh_key)
 #   {username}     - analyst username on server
-#   {webapp_url}   - webapp URL for registration
+#   {webapp_url}   - webapp URL
 #   {project_dir}  - local project folder name (instance.yaml: server.project_dir)
-server:
+header: |
-  host: "{server_host}"
+  Set up my AI Data Analyst local environment.
-  hostname: "{ssh_alias}"
+
 connection:
  server_host: "{server_host}"
  webapp_url: "{webapp_url}"
  username: "{username}"
  ssh_key: "{ssh_key}"
-setup:
+steps:
-  steps:
+  - name: "SSH config"
-    - name: "detect_existing_project"
+    description: |
-      description: "Check if project already exists"
+      Check ~/.ssh/config - if a Host entry named "{ssh_alias}" already exists
-      check: "test -f ./CLAUDE.md"
+      with a DIFFERENT server, ask me what name to use instead.
-      on_success: "verify_project_identity"
+      Otherwise add:
      message: |
        Existing CLAUDE.md detected in current directory.
        Verifying this is an AI Data Analyst project...
    - name: "verify_project_identity"
      description: "Verify this is the correct project type"
      check: "grep -q 'AI Data Analyst' ./CLAUDE.md"
      on_success: "existing_project_confirmed"
      on_failure: |
        Wrong project type detected.
        The CLAUDE.md file exists but doesn't match AI Data Analyst.
        Options:
        - Choose a different directory for setup
        - Remove existing CLAUDE.md if this was a mistake
      message: |
        AI Data Analyst project confirmed.
        This directory is already set up. You can:
        - Sync latest data: bash server/scripts/sync_data.sh
        - View project context: cat CLAUDE.md
        To recreate CLAUDE.md: rm -f ./CLAUDE.md and re-run bootstrap
    - name: "check_directory_empty"
      description: "Warn if directory is not empty"
      check: "[ $(ls -A . 2>/dev/null | wc -l) -eq 0 ]"
      on_failure: |
        Current directory is not empty.
        This setup will create:
        - .claude/ (project metadata)
        - server/ (read-only data from server: parquet files, docs, scripts)
        - user/ (your workspace: DuckDB database, artifacts)
        - .venv/ (Python virtual environment)
        Make sure you're in the correct directory before continuing.
      warn_only: true
      message: |
        Starting setup in current directory...
    - name: "generate_ssh_key"
      description: "Generate SSH key for server authentication"
      check: "test -f {ssh_key}.pub"
      action: |
        ssh-keygen -t ed25519 -f {ssh_key} -C "{username}@{ssh_alias}" -N ''
      on_success: "show_public_key"
      message: |
        SSH key generated successfully.
    - name: "show_public_key"
      description: "Display SSH public key to user"
      action: "cat {ssh_key}.pub"
      message: |
        Your SSH public key has been generated!
        Next steps:
        1. Copy the public key shown above
        2. Go to: {webapp_url}
        3. Sign in and paste the key into the form
        4. Wait a few seconds for account creation
        5. Come back here to continue
      wait_for_user: true
    - name: "add_ssh_config"
      description: "Add SSH config entry"
      requires: ["show_public_key"]
      check: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo ok 2>/dev/null"
      action: |
        mkdir -p ~/.ssh
        # Check if alias already exists with a different host
        if grep -q 'Host {ssh_alias}' ~/.ssh/config 2>/dev/null; then
          EXISTING_HOST=$(awk '/Host {ssh_alias}/,/Host / {if (/HostName/) print $2}' ~/.ssh/config | head -1)
          if [[ "$EXISTING_HOST" != "{server_host}" ]]; then
            echo "WARNING: SSH alias '{ssh_alias}' already exists pointing to $EXISTING_HOST"
            echo "Skipping SSH config - please resolve manually or use a different alias."
            exit 1
          fi
        else
          cat >> ~/.ssh/config << 'EOF'
        Host {ssh_alias}
-            HostName {server_host}
+          HostName {server_host}
-            User {username}
+          User {username}
-            IdentityFile {ssh_key}
+          IdentityFile {ssh_key}
-            StrictHostKeyChecking accept-new
+          StrictHostKeyChecking accept-new
-        EOF
+      Then test: ssh {ssh_alias} echo ok
          chmod 600 ~/.ssh/config
        fi
      message: |
        SSH configuration added for {ssh_alias} server.
-    - name: "test_ssh_connection"
+  - name: "Create project folders"
-      description: "Test SSH connection to server"
+    commands:
-      requires: ["add_ssh_config"]
+      - "mkdir -p server/docs server/scripts server/parquet server/metadata server/examples"
-      action: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo 'ok' 2>/dev/null"
+      - "mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions"
-      message: |
+      - 'printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > .sync_connection'
        Testing connection to data server...
        Connection successful!
      on_failure: |
        SSH connection failed!
-        Please verify:
+  - name: "Download from server"
-        1. You completed registration at {webapp_url}
+    description: |
-        2. Your account was created successfully
+      Use rsync with --no-perms --no-group to avoid macOS permission errors.
-        3. Your username matches: {username}
+      Skip directories that don't exist on the server (rsync exit code 23 = missing source).
-      retry: true
+    commands:
-      max_retries: 3
+      - "rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/"
      - "rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/"
      - "rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/"
      - "rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/"
      - "rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/"
    note: "Some folders may be empty if data sync hasn't run on the server yet. That's OK."
-    - name: "create_folders"
+  - name: "Set up Python venv"
-      description: "Create local project structure and save connection details"
+    commands:
-      action: |
+      - "python3 -m venv .venv"
-        mkdir -p ./server/docs ./server/scripts ./server/examples ./server/parquet ./server/metadata
+      - "source .venv/bin/activate"
-        mkdir -p ./user/duckdb ./user/notifications ./user/artifacts ./user/scripts ./user/parquet ./user/sessions
+      - "pip install pandas pyarrow duckdb pyyaml python-dotenv"
-        # Save connection details for sync_data.sh to use when generating CLAUDE.md
+  - name: "Initialize DuckDB"
-        printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > ./.sync_connection
+    condition: "only if server/scripts/setup_views.sh exists"
-      message: |
+    commands:
-        Project structure created (server/, user/).
+      - "bash server/scripts/setup_views.sh"
-    - name: "check_rsync"
+  - name: "Create CLAUDE.md"
-      description: "Verify rsync is available"
+    condition: "if server/docs/setup/claude_md_template.txt exists"
-      check: "command -v rsync >/dev/null 2>&1"
+    description: |
-      warn_only: true
+      Copy the template and replace these placeholders:
-      on_failure: |
+        {username} -> {username}
-        rsync is not installed. Install it for better sync performance:
+        {ssh_alias} -> {ssh_alias}
        {server_host} -> {server_host}
        {webapp_url} -> {webapp_url}
      Also create CLAUDE.local.md for personal notes (never overwritten by sync).
      Also copy server/docs/setup/claude_settings.json to .claude/settings.json.
-        macOS:   brew install rsync
+existing_project:
-        Ubuntu:  sudo apt-get install -y rsync
+  check: "If CLAUDE.md already exists and contains 'AI Data Analyst'"
-        RHEL:    sudo yum install -y rsync
+  message: |
-
+    This directory is already set up. Just sync latest data:
-        Without rsync, scp will be used as fallback (slower).
+      bash server/scripts/sync_data.sh
    - name: "download_server_data"
      description: "Download all server data (scripts, docs, metadata, parquet)"
      action: |
        echo "Syncing scripts..."
        rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/ 2>/dev/null || \
          scp -r {ssh_alias}:server/scripts/* ./server/scripts/ 2>/dev/null || true
        echo "Syncing documentation..."
        rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/ 2>/dev/null || \
          scp -r {ssh_alias}:server/docs/* ./server/docs/ 2>/dev/null || true
        echo "Syncing examples..."
        rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/ 2>/dev/null || true
        echo "Syncing metadata..."
        rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/ 2>/dev/null || \
          scp -r {ssh_alias}:server/metadata/* ./server/metadata/ 2>/dev/null || true
        echo "Syncing parquet data (this may take a few minutes)..."
        rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/ 2>/dev/null || \
          scp -r {ssh_alias}:server/parquet/* ./server/parquet/ 2>/dev/null || true
      requires: ["test_ssh_connection", "create_folders"]
      message: |
        Downloading data from server...
        Data downloaded successfully!
    - name: "setup_venv"
      description: "Create Python virtual environment and install dependencies"
      check: "test -f ./.venv/bin/python || test -f ./.venv/Scripts/python.exe"
      action: |
        if command -v python3 >/dev/null 2>&1; then
          PYTHON_CMD=python3
        else
          PYTHON_CMD=python
        fi
        $PYTHON_CMD -m venv ./.venv
        if [ -f ./.venv/bin/activate ]; then
          source ./.venv/bin/activate
        else
          source ./.venv/Scripts/activate
        fi
        pip install --upgrade pip --quiet
        pip install pandas pyarrow duckdb pyyaml python-dotenv --quiet
      requires: ["create_folders"]
      message: |
        Setting up Python environment...
        Python environment ready!
    - name: "initialize_duckdb"
      description: "Initialize DuckDB views on Parquet files"
      action: |
        if [[ -f server/scripts/setup_views.sh ]]; then
          bash server/scripts/setup_views.sh
        else
          echo "setup_views.sh not found, skipping DuckDB initialization"
        fi
      requires: ["download_server_data", "setup_venv"]
      message: |
        Initializing DuckDB analytical database...
        DuckDB initialized! All tables ready for queries.
    - name: "setup_claude_project_context"
      description: "Create Claude Code project context files"
      action: |
        # Generate CLAUDE.md from template
        if [[ -f "./server/docs/setup/claude_md_template.txt" ]]; then
          sed -e "s/{username}/{username}/g" \
              ./server/docs/setup/claude_md_template.txt > ./CLAUDE.md
          chmod 644 ./CLAUDE.md
        fi
        # Create CLAUDE.local.md for personal customizations
        if [[ ! -f "./CLAUDE.local.md" ]]; then
          cat > ./CLAUDE.local.md << 'LOCALEOF'
        # CLAUDE.local.md
        Your personal instructions for Claude Code in this project.
        This file is NOT overwritten by data sync - it is yours to customize.
        ## Your Custom Instructions
        Add your preferences, shortcuts, or project-specific notes below:
        LOCALEOF
          chmod 644 ./CLAUDE.local.md
        fi
        # Copy project permissions
        mkdir -p ./.claude
        if [[ -f "./server/docs/setup/claude_settings.json" ]]; then
          cp ./server/docs/setup/claude_settings.json ./.claude/settings.json
        fi
      requires: ["download_server_data"]
      message: |
        CLAUDE.md created (auto-updated on sync).
        CLAUDE.local.md created (your personal customizations, never overwritten).
    - name: "check_setup"
      description: "Verify setup completed successfully"
      requires: ["initialize_duckdb", "setup_claude_project_context"]
      message: |
        Setup complete! Your AI Data Analyst environment is ready.
        What's been set up:
        - Data tables synced as local Parquet files
        - DuckDB analytical database with views configured
        - Python environment with pandas, pyarrow, duckdb
        - Helper scripts for data sync
        You can now start asking questions about your data.
        See server/docs/data_description.md for table schemas.
        To sync latest data: bash server/scripts/sync_data.sh
 # Python dependencies
 dependencies:
  - pandas>=2.0.0
  - pyarrow>=12.0.0
  - duckdb>=0.9.0
  - pyyaml>=6.0
  - python-dotenv>=1.0.0
--- a/webapp/app.py
+++ b/webapp/app.py
@ -225,6 +225,92 @@ FALLBACK_DATA_STATS = {
 }
 def _generate_setup_instructions(username: str) -> str:
    """Generate clipboard-ready setup instructions from bootstrap.yaml.
    Reads the structured YAML, substitutes placeholders from instance config,
    and produces plain text that users paste into Claude Code.
    """
    bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml")
    with open(bootstrap_path, "r") as f:
        bootstrap = yaml.safe_load(f)
    webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
    placeholders = {
        "{username}": username,
        "{server_host}": Config.SERVER_HOST,
        "{server_hostname}": Config.SERVER_HOSTNAME,
        "{ssh_alias}": Config.SSH_ALIAS,
        "{ssh_key}": Config.SSH_KEY,
        "{project_dir}": Config.PROJECT_DIR,
        "{webapp_url}": webapp_url,
    }
    def sub(text: str) -> str:
        for key, val in placeholders.items():
            text = text.replace(key, val)
        return text
    lines = []
    # Header
    if "header" in bootstrap:
        lines.append(sub(bootstrap["header"]).strip())
        lines.append("")
    # Connection details
    conn = bootstrap.get("connection", {})
    if conn:
        lines.append("Connection details:")
        for key, val in conn.items():
            label = key.replace("_", " ").replace("host", "IP").replace("url", "URL")
            display_val = sub(val)
            if key == "ssh_key":
                display_val += " (already generated)"
            lines.append(f"  {label}: {display_val}")
        lines.append("")
    # Steps
    lines.append("Steps:")
    lines.append("")
    for i, step in enumerate(bootstrap.get("steps", []), 1):
        name = sub(step.get("name", ""))
        condition = step.get("condition", "")
        if condition:
            lines.append(f"{i}. {name} ({sub(condition)}):")
        else:
            lines.append(f"{i}. {name}:")
        # Description (free text instructions for Claude)
        desc = step.get("description", "")
        if desc:
            for line in sub(desc).strip().splitlines():
                lines.append(f"   {line}")
        # Commands (executable shell commands)
        commands = step.get("commands", [])
        for cmd in commands:
            lines.append(f"   {sub(cmd)}")
        # Note
        note = step.get("note", "")
        if note:
            lines.append(f"   Note: {sub(note)}")
        lines.append("")
    # Existing project hint
    existing = bootstrap.get("existing_project", {})
    if existing:
        msg = existing.get("message", "")
        if msg:
            lines.append("If this directory already has CLAUDE.md with 'AI Data Analyst':")
            for line in sub(msg).strip().splitlines():
                lines.append(f"  {line}")
    return "\n".join(lines)
 def _load_data_stats() -> dict:
    """Load aggregate data stats from sync_state.json, with hardcoded fallback."""
    try:
@ -812,25 +898,12 @@ def register_routes(app: Flask) -> None:
        # Check if username is available (for new registrations)
        username_available, username_error = is_username_available(username)
-        # Read bootstrap YAML for Claude Code setup instructions
+        # Generate setup instructions from bootstrap.yaml
-        bootstrap_yaml = ""
+        setup_instructions = ""
        try:
-            bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml")
+            setup_instructions = _generate_setup_instructions(username)
            with open(bootstrap_path, "r") as f:
                bootstrap_yaml_template = f.read()
            # Inject username and server info into template
            bootstrap_yaml = bootstrap_yaml_template.replace("{username}", username)
            bootstrap_yaml = bootstrap_yaml.replace("{server_host}", Config.SERVER_HOST)
            bootstrap_yaml = bootstrap_yaml.replace("{server_hostname}", Config.SERVER_HOSTNAME)
            bootstrap_yaml = bootstrap_yaml.replace("{ssh_alias}", Config.SSH_ALIAS)
            bootstrap_yaml = bootstrap_yaml.replace("{ssh_key}", Config.SSH_KEY)
            bootstrap_yaml = bootstrap_yaml.replace("{project_dir}", Config.PROJECT_DIR)
            webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
            bootstrap_yaml = bootstrap_yaml.replace("{webapp_url}", webapp_url)
        except Exception as e:
-            logger.warning(f"Could not read bootstrap.yaml: {e}")
+            logger.warning(f"Could not generate setup instructions: {e}")
        # Get Telegram link status
        telegram_status = get_telegram_status(username)
@ -879,7 +952,7 @@ def register_routes(app: Flask) -> None:
            ssh_alias=Config.SSH_ALIAS,
            ssh_key=Config.SSH_KEY,
            project_dir=Config.PROJECT_DIR,
-            bootstrap_yaml=bootstrap_yaml,
+            setup_instructions=setup_instructions,
            telegram_status=telegram_status,
            desktop_status=desktop_status,
            data_stats=data_stats,
--- a/webapp/templates/dashboard.html
+++ b/webapp/templates/dashboard.html
@ -2423,54 +2423,7 @@
    }
    function copyBootstrapInstructions(btn) {
-        var username = {{ username | tojson }};
+        var instructions = {{ setup_instructions | tojson }};
        var serverHost = {{ server_host | tojson }};
        var serverHostname = {{ server_hostname | tojson }};
        var webappUrl = serverHostname ? 'http://' + serverHostname : '';
        var sshAlias = {{ ssh_alias | tojson }};
        var sshKey = {{ ssh_key | tojson }};
        var instructions = 'Set up my AI Data Analyst local environment.\n\n'
            + 'Connection details:\n'
            + '  Server IP: ' + serverHost + '\n'
            + '  Webapp: ' + webappUrl + '\n'
            + '  My username: ' + username + '\n'
            + '  SSH key: ' + sshKey + ' (already generated)\n\n'
            + 'Steps:\n\n'
            + '1. SSH config\n'
            + '   Check ~/.ssh/config - if a Host entry named "' + sshAlias + '" already exists\n'
            + '   with a DIFFERENT server, ask me what name to use instead.\n'
            + '   Otherwise add:\n'
            + '     Host ' + sshAlias + '\n'
            + '       HostName ' + serverHost + '\n'
            + '       User ' + username + '\n'
            + '       IdentityFile ' + sshKey + '\n'
            + '       StrictHostKeyChecking accept-new\n'
            + '   Then test: ssh ' + sshAlias + ' echo ok\n\n'
            + '2. Create project folders:\n'
            + '   mkdir -p server/docs server/scripts server/parquet server/metadata server/examples\n'
            + '   mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions\n'
            + '   printf "ssh_alias=' + sshAlias + '\\nserver_host=' + serverHost + '\\nwebapp_url=' + webappUrl + '\\n" > .sync_connection\n\n'
            + '3. Download from server via rsync (use --no-perms --no-group to avoid macOS permission errors).\n'
            + '   Skip directories that don\'t exist on the server (rsync exit code 23 = missing source).\n'
            + '   rsync -avz --no-perms --no-group ' + sshAlias + ':server/scripts/ ./server/scripts/\n'
            + '   rsync -avz --no-perms --no-group ' + sshAlias + ':server/docs/ ./server/docs/\n'
            + '   rsync -avz --no-perms --no-group ' + sshAlias + ':server/examples/ ./server/examples/\n'
            + '   rsync -avz --no-perms --no-group ' + sshAlias + ':server/metadata/ ./server/metadata/\n'
            + '   rsync -avz --no-perms --no-group --progress ' + sshAlias + ':server/parquet/ ./server/parquet/\n'
            + '   Note: some folders may be empty if data sync hasn\'t run on the server yet. That\'s OK.\n\n'
            + '4. Set up Python venv:\n'
            + '   python3 -m venv .venv\n'
            + '   source .venv/bin/activate\n'
            + '   pip install pandas pyarrow duckdb pyyaml python-dotenv\n\n'
            + '5. Initialize DuckDB (only if server/scripts/setup_views.sh exists):\n'
            + '   bash server/scripts/setup_views.sh\n\n'
            + '6. Create CLAUDE.md (if server/docs/setup/claude_md_template.txt exists):\n'
            + '   Copy the template, replace placeholders:\n'
            + '     {username} -> ' + username + '\n'
            + '     {ssh_alias} -> ' + sshAlias + '\n'
            + '     {server_host} -> ' + serverHost + '\n'
            + '     {webapp_url} -> ' + webappUrl + '\n';
        var button = btn || document.getElementById('bootstrapCopyBtn');
        var origText = button.textContent;