From 508d92771fed86c0a23cb6e76e79c32300a277f0 Mon Sep 17 00:00:00 2001 From: Petr Date: Sun, 15 Mar 2026 00:37:19 +0100 Subject: [PATCH] Generate setup instructions from bootstrap.yaml (single source of truth) - Rewrite bootstrap.yaml as clean structured YAML with steps, commands, descriptions, conditions, and notes - Add _generate_setup_instructions() in app.py that reads YAML, substitutes placeholders, and produces clipboard-ready plain text - Replace 50-line hardcoded JS string builder with single tojson variable - All setup instructions now editable in one YAML file --- docs/setup/bootstrap.yaml | 336 +++++++------------------------- webapp/app.py | 109 +++++++++-- webapp/templates/dashboard.html | 49 +---- 3 files changed, 160 insertions(+), 334 deletions(-) diff --git a/docs/setup/bootstrap.yaml b/docs/setup/bootstrap.yaml index 8824b7c..d98d3ab 100644 --- a/docs/setup/bootstrap.yaml +++ b/docs/setup/bootstrap.yaml @@ -1,280 +1,80 @@ -version: "1.0" -project_name: "ai_data_analyst" -project_dir: "." - -# Placeholders filled by webapp per-user: +# AI Data Analyst - Setup Instructions +# +# Single source of truth for local environment setup. +# Webapp reads this, substitutes placeholders, and generates clipboard text. +# +# Placeholders (filled from instance.yaml by webapp): # {server_host} - server IP or hostname -# {ssh_alias} - SSH config alias (default: "data-analyst", configurable to avoid conflicts) -# {ssh_key} - SSH private key path (default: ~/.ssh/data_analyst_server) +# {ssh_alias} - SSH config alias (instance.yaml: server.ssh_alias) +# {ssh_key} - SSH private key path (instance.yaml: server.ssh_key) # {username} - analyst username on server -# {webapp_url} - webapp URL for registration +# {webapp_url} - webapp URL +# {project_dir} - local project folder name (instance.yaml: server.project_dir) -server: - host: "{server_host}" - hostname: "{ssh_alias}" +header: | + Set up my AI Data Analyst local environment. + +connection: + server_host: "{server_host}" webapp_url: "{webapp_url}" + username: "{username}" + ssh_key: "{ssh_key}" -setup: - steps: - - name: "detect_existing_project" - description: "Check if project already exists" - check: "test -f ./CLAUDE.md" - on_success: "verify_project_identity" - message: | - Existing CLAUDE.md detected in current directory. - Verifying this is an AI Data Analyst project... - - - name: "verify_project_identity" - description: "Verify this is the correct project type" - check: "grep -q 'AI Data Analyst' ./CLAUDE.md" - on_success: "existing_project_confirmed" - on_failure: | - Wrong project type detected. - The CLAUDE.md file exists but doesn't match AI Data Analyst. - - Options: - - Choose a different directory for setup - - Remove existing CLAUDE.md if this was a mistake - message: | - AI Data Analyst project confirmed. - - This directory is already set up. You can: - - Sync latest data: bash server/scripts/sync_data.sh - - View project context: cat CLAUDE.md - - To recreate CLAUDE.md: rm -f ./CLAUDE.md and re-run bootstrap - - - name: "check_directory_empty" - description: "Warn if directory is not empty" - check: "[ $(ls -A . 2>/dev/null | wc -l) -eq 0 ]" - on_failure: | - Current directory is not empty. - - This setup will create: - - .claude/ (project metadata) - - server/ (read-only data from server: parquet files, docs, scripts) - - user/ (your workspace: DuckDB database, artifacts) - - .venv/ (Python virtual environment) - - Make sure you're in the correct directory before continuing. - warn_only: true - message: | - Starting setup in current directory... - - - name: "generate_ssh_key" - description: "Generate SSH key for server authentication" - check: "test -f {ssh_key}.pub" - action: | - ssh-keygen -t ed25519 -f {ssh_key} -C "{username}@{ssh_alias}" -N '' - on_success: "show_public_key" - message: | - SSH key generated successfully. - - - name: "show_public_key" - description: "Display SSH public key to user" - action: "cat {ssh_key}.pub" - message: | - Your SSH public key has been generated! - - Next steps: - 1. Copy the public key shown above - 2. Go to: {webapp_url} - 3. Sign in and paste the key into the form - 4. Wait a few seconds for account creation - 5. Come back here to continue - wait_for_user: true - - - name: "add_ssh_config" - description: "Add SSH config entry" - requires: ["show_public_key"] - check: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo ok 2>/dev/null" - action: | - mkdir -p ~/.ssh - - # Check if alias already exists with a different host - if grep -q 'Host {ssh_alias}' ~/.ssh/config 2>/dev/null; then - EXISTING_HOST=$(awk '/Host {ssh_alias}/,/Host / {if (/HostName/) print $2}' ~/.ssh/config | head -1) - if [[ "$EXISTING_HOST" != "{server_host}" ]]; then - echo "WARNING: SSH alias '{ssh_alias}' already exists pointing to $EXISTING_HOST" - echo "Skipping SSH config - please resolve manually or use a different alias." - exit 1 - fi - else - cat >> ~/.ssh/config << 'EOF' - +steps: + - name: "SSH config" + description: | + Check ~/.ssh/config - if a Host entry named "{ssh_alias}" already exists + with a DIFFERENT server, ask me what name to use instead. + Otherwise add: Host {ssh_alias} - HostName {server_host} - User {username} - IdentityFile {ssh_key} - StrictHostKeyChecking accept-new - EOF - chmod 600 ~/.ssh/config - fi - message: | - SSH configuration added for {ssh_alias} server. + HostName {server_host} + User {username} + IdentityFile {ssh_key} + StrictHostKeyChecking accept-new + Then test: ssh {ssh_alias} echo ok - - name: "test_ssh_connection" - description: "Test SSH connection to server" - requires: ["add_ssh_config"] - action: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo 'ok' 2>/dev/null" - message: | - Testing connection to data server... - Connection successful! - on_failure: | - SSH connection failed! + - name: "Create project folders" + commands: + - "mkdir -p server/docs server/scripts server/parquet server/metadata server/examples" + - "mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions" + - 'printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > .sync_connection' - Please verify: - 1. You completed registration at {webapp_url} - 2. Your account was created successfully - 3. Your username matches: {username} - retry: true - max_retries: 3 + - name: "Download from server" + description: | + Use rsync with --no-perms --no-group to avoid macOS permission errors. + Skip directories that don't exist on the server (rsync exit code 23 = missing source). + commands: + - "rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/" + - "rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/" + - "rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/" + - "rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/" + - "rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/" + note: "Some folders may be empty if data sync hasn't run on the server yet. That's OK." - - name: "create_folders" - description: "Create local project structure and save connection details" - action: | - mkdir -p ./server/docs ./server/scripts ./server/examples ./server/parquet ./server/metadata - mkdir -p ./user/duckdb ./user/notifications ./user/artifacts ./user/scripts ./user/parquet ./user/sessions + - name: "Set up Python venv" + commands: + - "python3 -m venv .venv" + - "source .venv/bin/activate" + - "pip install pandas pyarrow duckdb pyyaml python-dotenv" - # Save connection details for sync_data.sh to use when generating CLAUDE.md - printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > ./.sync_connection - message: | - Project structure created (server/, user/). + - name: "Initialize DuckDB" + condition: "only if server/scripts/setup_views.sh exists" + commands: + - "bash server/scripts/setup_views.sh" - - name: "check_rsync" - description: "Verify rsync is available" - check: "command -v rsync >/dev/null 2>&1" - warn_only: true - on_failure: | - rsync is not installed. Install it for better sync performance: + - name: "Create CLAUDE.md" + condition: "if server/docs/setup/claude_md_template.txt exists" + description: | + Copy the template and replace these placeholders: + {username} -> {username} + {ssh_alias} -> {ssh_alias} + {server_host} -> {server_host} + {webapp_url} -> {webapp_url} + Also create CLAUDE.local.md for personal notes (never overwritten by sync). + Also copy server/docs/setup/claude_settings.json to .claude/settings.json. - macOS: brew install rsync - Ubuntu: sudo apt-get install -y rsync - RHEL: sudo yum install -y rsync - - Without rsync, scp will be used as fallback (slower). - - - name: "download_server_data" - description: "Download all server data (scripts, docs, metadata, parquet)" - action: | - echo "Syncing scripts..." - rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/ 2>/dev/null || \ - scp -r {ssh_alias}:server/scripts/* ./server/scripts/ 2>/dev/null || true - - echo "Syncing documentation..." - rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/ 2>/dev/null || \ - scp -r {ssh_alias}:server/docs/* ./server/docs/ 2>/dev/null || true - - echo "Syncing examples..." - rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/ 2>/dev/null || true - - echo "Syncing metadata..." - rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/ 2>/dev/null || \ - scp -r {ssh_alias}:server/metadata/* ./server/metadata/ 2>/dev/null || true - - echo "Syncing parquet data (this may take a few minutes)..." - rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/ 2>/dev/null || \ - scp -r {ssh_alias}:server/parquet/* ./server/parquet/ 2>/dev/null || true - requires: ["test_ssh_connection", "create_folders"] - message: | - Downloading data from server... - Data downloaded successfully! - - - name: "setup_venv" - description: "Create Python virtual environment and install dependencies" - check: "test -f ./.venv/bin/python || test -f ./.venv/Scripts/python.exe" - action: | - if command -v python3 >/dev/null 2>&1; then - PYTHON_CMD=python3 - else - PYTHON_CMD=python - fi - - $PYTHON_CMD -m venv ./.venv - - if [ -f ./.venv/bin/activate ]; then - source ./.venv/bin/activate - else - source ./.venv/Scripts/activate - fi - - pip install --upgrade pip --quiet - pip install pandas pyarrow duckdb pyyaml python-dotenv --quiet - requires: ["create_folders"] - message: | - Setting up Python environment... - Python environment ready! - - - name: "initialize_duckdb" - description: "Initialize DuckDB views on Parquet files" - action: | - if [[ -f server/scripts/setup_views.sh ]]; then - bash server/scripts/setup_views.sh - else - echo "setup_views.sh not found, skipping DuckDB initialization" - fi - requires: ["download_server_data", "setup_venv"] - message: | - Initializing DuckDB analytical database... - DuckDB initialized! All tables ready for queries. - - - name: "setup_claude_project_context" - description: "Create Claude Code project context files" - action: | - # Generate CLAUDE.md from template - if [[ -f "./server/docs/setup/claude_md_template.txt" ]]; then - sed -e "s/{username}/{username}/g" \ - ./server/docs/setup/claude_md_template.txt > ./CLAUDE.md - chmod 644 ./CLAUDE.md - fi - - # Create CLAUDE.local.md for personal customizations - if [[ ! -f "./CLAUDE.local.md" ]]; then - cat > ./CLAUDE.local.md << 'LOCALEOF' - # CLAUDE.local.md - - Your personal instructions for Claude Code in this project. - This file is NOT overwritten by data sync - it is yours to customize. - - ## Your Custom Instructions - - Add your preferences, shortcuts, or project-specific notes below: - - LOCALEOF - chmod 644 ./CLAUDE.local.md - fi - - # Copy project permissions - mkdir -p ./.claude - if [[ -f "./server/docs/setup/claude_settings.json" ]]; then - cp ./server/docs/setup/claude_settings.json ./.claude/settings.json - fi - requires: ["download_server_data"] - message: | - CLAUDE.md created (auto-updated on sync). - CLAUDE.local.md created (your personal customizations, never overwritten). - - - name: "check_setup" - description: "Verify setup completed successfully" - requires: ["initialize_duckdb", "setup_claude_project_context"] - message: | - Setup complete! Your AI Data Analyst environment is ready. - - What's been set up: - - Data tables synced as local Parquet files - - DuckDB analytical database with views configured - - Python environment with pandas, pyarrow, duckdb - - Helper scripts for data sync - - You can now start asking questions about your data. - See server/docs/data_description.md for table schemas. - - To sync latest data: bash server/scripts/sync_data.sh - -# Python dependencies -dependencies: - - pandas>=2.0.0 - - pyarrow>=12.0.0 - - duckdb>=0.9.0 - - pyyaml>=6.0 - - python-dotenv>=1.0.0 +existing_project: + check: "If CLAUDE.md already exists and contains 'AI Data Analyst'" + message: | + This directory is already set up. Just sync latest data: + bash server/scripts/sync_data.sh diff --git a/webapp/app.py b/webapp/app.py index 3f88757..f119a13 100644 --- a/webapp/app.py +++ b/webapp/app.py @@ -225,6 +225,92 @@ FALLBACK_DATA_STATS = { } +def _generate_setup_instructions(username: str) -> str: + """Generate clipboard-ready setup instructions from bootstrap.yaml. + + Reads the structured YAML, substitutes placeholders from instance config, + and produces plain text that users paste into Claude Code. + """ + bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml") + with open(bootstrap_path, "r") as f: + bootstrap = yaml.safe_load(f) + + webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else "" + placeholders = { + "{username}": username, + "{server_host}": Config.SERVER_HOST, + "{server_hostname}": Config.SERVER_HOSTNAME, + "{ssh_alias}": Config.SSH_ALIAS, + "{ssh_key}": Config.SSH_KEY, + "{project_dir}": Config.PROJECT_DIR, + "{webapp_url}": webapp_url, + } + + def sub(text: str) -> str: + for key, val in placeholders.items(): + text = text.replace(key, val) + return text + + lines = [] + + # Header + if "header" in bootstrap: + lines.append(sub(bootstrap["header"]).strip()) + lines.append("") + + # Connection details + conn = bootstrap.get("connection", {}) + if conn: + lines.append("Connection details:") + for key, val in conn.items(): + label = key.replace("_", " ").replace("host", "IP").replace("url", "URL") + display_val = sub(val) + if key == "ssh_key": + display_val += " (already generated)" + lines.append(f" {label}: {display_val}") + lines.append("") + + # Steps + lines.append("Steps:") + lines.append("") + for i, step in enumerate(bootstrap.get("steps", []), 1): + name = sub(step.get("name", "")) + condition = step.get("condition", "") + if condition: + lines.append(f"{i}. {name} ({sub(condition)}):") + else: + lines.append(f"{i}. {name}:") + + # Description (free text instructions for Claude) + desc = step.get("description", "") + if desc: + for line in sub(desc).strip().splitlines(): + lines.append(f" {line}") + + # Commands (executable shell commands) + commands = step.get("commands", []) + for cmd in commands: + lines.append(f" {sub(cmd)}") + + # Note + note = step.get("note", "") + if note: + lines.append(f" Note: {sub(note)}") + + lines.append("") + + # Existing project hint + existing = bootstrap.get("existing_project", {}) + if existing: + msg = existing.get("message", "") + if msg: + lines.append("If this directory already has CLAUDE.md with 'AI Data Analyst':") + for line in sub(msg).strip().splitlines(): + lines.append(f" {line}") + + return "\n".join(lines) + + def _load_data_stats() -> dict: """Load aggregate data stats from sync_state.json, with hardcoded fallback.""" try: @@ -812,25 +898,12 @@ def register_routes(app: Flask) -> None: # Check if username is available (for new registrations) username_available, username_error = is_username_available(username) - # Read bootstrap YAML for Claude Code setup instructions - bootstrap_yaml = "" + # Generate setup instructions from bootstrap.yaml + setup_instructions = "" try: - bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml") - with open(bootstrap_path, "r") as f: - bootstrap_yaml_template = f.read() - - # Inject username and server info into template - bootstrap_yaml = bootstrap_yaml_template.replace("{username}", username) - bootstrap_yaml = bootstrap_yaml.replace("{server_host}", Config.SERVER_HOST) - bootstrap_yaml = bootstrap_yaml.replace("{server_hostname}", Config.SERVER_HOSTNAME) - bootstrap_yaml = bootstrap_yaml.replace("{ssh_alias}", Config.SSH_ALIAS) - bootstrap_yaml = bootstrap_yaml.replace("{ssh_key}", Config.SSH_KEY) - bootstrap_yaml = bootstrap_yaml.replace("{project_dir}", Config.PROJECT_DIR) - webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else "" - bootstrap_yaml = bootstrap_yaml.replace("{webapp_url}", webapp_url) - + setup_instructions = _generate_setup_instructions(username) except Exception as e: - logger.warning(f"Could not read bootstrap.yaml: {e}") + logger.warning(f"Could not generate setup instructions: {e}") # Get Telegram link status telegram_status = get_telegram_status(username) @@ -879,7 +952,7 @@ def register_routes(app: Flask) -> None: ssh_alias=Config.SSH_ALIAS, ssh_key=Config.SSH_KEY, project_dir=Config.PROJECT_DIR, - bootstrap_yaml=bootstrap_yaml, + setup_instructions=setup_instructions, telegram_status=telegram_status, desktop_status=desktop_status, data_stats=data_stats, diff --git a/webapp/templates/dashboard.html b/webapp/templates/dashboard.html index b5ef4ed..38f5d93 100644 --- a/webapp/templates/dashboard.html +++ b/webapp/templates/dashboard.html @@ -2423,54 +2423,7 @@ } function copyBootstrapInstructions(btn) { - var username = {{ username | tojson }}; - var serverHost = {{ server_host | tojson }}; - var serverHostname = {{ server_hostname | tojson }}; - var webappUrl = serverHostname ? 'http://' + serverHostname : ''; - - var sshAlias = {{ ssh_alias | tojson }}; - var sshKey = {{ ssh_key | tojson }}; - var instructions = 'Set up my AI Data Analyst local environment.\n\n' - + 'Connection details:\n' - + ' Server IP: ' + serverHost + '\n' - + ' Webapp: ' + webappUrl + '\n' - + ' My username: ' + username + '\n' - + ' SSH key: ' + sshKey + ' (already generated)\n\n' - + 'Steps:\n\n' - + '1. SSH config\n' - + ' Check ~/.ssh/config - if a Host entry named "' + sshAlias + '" already exists\n' - + ' with a DIFFERENT server, ask me what name to use instead.\n' - + ' Otherwise add:\n' - + ' Host ' + sshAlias + '\n' - + ' HostName ' + serverHost + '\n' - + ' User ' + username + '\n' - + ' IdentityFile ' + sshKey + '\n' - + ' StrictHostKeyChecking accept-new\n' - + ' Then test: ssh ' + sshAlias + ' echo ok\n\n' - + '2. Create project folders:\n' - + ' mkdir -p server/docs server/scripts server/parquet server/metadata server/examples\n' - + ' mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions\n' - + ' printf "ssh_alias=' + sshAlias + '\\nserver_host=' + serverHost + '\\nwebapp_url=' + webappUrl + '\\n" > .sync_connection\n\n' - + '3. Download from server via rsync (use --no-perms --no-group to avoid macOS permission errors).\n' - + ' Skip directories that don\'t exist on the server (rsync exit code 23 = missing source).\n' - + ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/scripts/ ./server/scripts/\n' - + ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/docs/ ./server/docs/\n' - + ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/examples/ ./server/examples/\n' - + ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/metadata/ ./server/metadata/\n' - + ' rsync -avz --no-perms --no-group --progress ' + sshAlias + ':server/parquet/ ./server/parquet/\n' - + ' Note: some folders may be empty if data sync hasn\'t run on the server yet. That\'s OK.\n\n' - + '4. Set up Python venv:\n' - + ' python3 -m venv .venv\n' - + ' source .venv/bin/activate\n' - + ' pip install pandas pyarrow duckdb pyyaml python-dotenv\n\n' - + '5. Initialize DuckDB (only if server/scripts/setup_views.sh exists):\n' - + ' bash server/scripts/setup_views.sh\n\n' - + '6. Create CLAUDE.md (if server/docs/setup/claude_md_template.txt exists):\n' - + ' Copy the template, replace placeholders:\n' - + ' {username} -> ' + username + '\n' - + ' {ssh_alias} -> ' + sshAlias + '\n' - + ' {server_host} -> ' + serverHost + '\n' - + ' {webapp_url} -> ' + webappUrl + '\n'; + var instructions = {{ setup_instructions | tojson }}; var button = btn || document.getElementById('bootstrapCopyBtn'); var origText = button.textContent;