version: "1.0" project_name: "ai_data_analyst" project_dir: "." # Placeholders filled by webapp per-user: # {server_host} - server IP or hostname # {ssh_alias} - SSH config alias (default: "data-analyst", configurable to avoid conflicts) # {ssh_key} - SSH private key path (default: ~/.ssh/data_analyst_server) # {username} - analyst username on server # {webapp_url} - webapp URL for registration server: host: "{server_host}" hostname: "{ssh_alias}" webapp_url: "{webapp_url}" setup: steps: - name: "detect_existing_project" description: "Check if project already exists" check: "test -f ./CLAUDE.md" on_success: "verify_project_identity" message: | Existing CLAUDE.md detected in current directory. Verifying this is an AI Data Analyst project... - name: "verify_project_identity" description: "Verify this is the correct project type" check: "grep -q 'AI Data Analyst' ./CLAUDE.md" on_success: "existing_project_confirmed" on_failure: | Wrong project type detected. The CLAUDE.md file exists but doesn't match AI Data Analyst. Options: - Choose a different directory for setup - Remove existing CLAUDE.md if this was a mistake message: | AI Data Analyst project confirmed. This directory is already set up. You can: - Sync latest data: bash server/scripts/sync_data.sh - View project context: cat CLAUDE.md To recreate CLAUDE.md: rm -f ./CLAUDE.md and re-run bootstrap - name: "check_directory_empty" description: "Warn if directory is not empty" check: "[ $(ls -A . 2>/dev/null | wc -l) -eq 0 ]" on_failure: | Current directory is not empty. This setup will create: - .claude/ (project metadata) - server/ (read-only data from server: parquet files, docs, scripts) - user/ (your workspace: DuckDB database, artifacts) - .venv/ (Python virtual environment) Make sure you're in the correct directory before continuing. warn_only: true message: | Starting setup in current directory... - name: "generate_ssh_key" description: "Generate SSH key for server authentication" check: "test -f {ssh_key}.pub" action: | ssh-keygen -t ed25519 -f {ssh_key} -C "{username}@{ssh_alias}" -N '' on_success: "show_public_key" message: | SSH key generated successfully. - name: "show_public_key" description: "Display SSH public key to user" action: "cat {ssh_key}.pub" message: | Your SSH public key has been generated! Next steps: 1. Copy the public key shown above 2. Go to: {webapp_url} 3. Sign in and paste the key into the form 4. Wait a few seconds for account creation 5. Come back here to continue wait_for_user: true - name: "add_ssh_config" description: "Add SSH config entry" requires: ["show_public_key"] check: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo ok 2>/dev/null" action: | mkdir -p ~/.ssh # Check if alias already exists with a different host if grep -q 'Host {ssh_alias}' ~/.ssh/config 2>/dev/null; then EXISTING_HOST=$(awk '/Host {ssh_alias}/,/Host / {if (/HostName/) print $2}' ~/.ssh/config | head -1) if [[ "$EXISTING_HOST" != "{server_host}" ]]; then echo "WARNING: SSH alias '{ssh_alias}' already exists pointing to $EXISTING_HOST" echo "Skipping SSH config - please resolve manually or use a different alias." exit 1 fi else cat >> ~/.ssh/config << 'EOF' Host {ssh_alias} HostName {server_host} User {username} IdentityFile {ssh_key} StrictHostKeyChecking accept-new EOF chmod 600 ~/.ssh/config fi message: | SSH configuration added for {ssh_alias} server. - name: "test_ssh_connection" description: "Test SSH connection to server" requires: ["add_ssh_config"] action: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo 'ok' 2>/dev/null" message: | Testing connection to data server... Connection successful! on_failure: | SSH connection failed! Please verify: 1. You completed registration at {webapp_url} 2. Your account was created successfully 3. Your username matches: {username} retry: true max_retries: 3 - name: "create_folders" description: "Create local project structure and save connection details" action: | mkdir -p ./server/docs ./server/scripts ./server/examples ./server/parquet ./server/metadata mkdir -p ./user/duckdb ./user/notifications ./user/artifacts ./user/scripts ./user/parquet ./user/sessions # Save connection details for sync_data.sh to use when generating CLAUDE.md cat > ./.sync_connection << 'CONN' ssh_alias={ssh_alias} server_host={server_host} webapp_url={webapp_url} CONN message: | Project structure created (server/, user/). - name: "check_rsync" description: "Verify rsync is available" check: "command -v rsync >/dev/null 2>&1" warn_only: true on_failure: | rsync is not installed. Install it for better sync performance: macOS: brew install rsync Ubuntu: sudo apt-get install -y rsync RHEL: sudo yum install -y rsync Without rsync, scp will be used as fallback (slower). - name: "download_server_data" description: "Download all server data (scripts, docs, metadata, parquet)" action: | echo "Syncing scripts..." rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/ 2>/dev/null || \ scp -r {ssh_alias}:server/scripts/* ./server/scripts/ 2>/dev/null || true echo "Syncing documentation..." rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/ 2>/dev/null || \ scp -r {ssh_alias}:server/docs/* ./server/docs/ 2>/dev/null || true echo "Syncing examples..." rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/ 2>/dev/null || true echo "Syncing metadata..." rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/ 2>/dev/null || \ scp -r {ssh_alias}:server/metadata/* ./server/metadata/ 2>/dev/null || true echo "Syncing parquet data (this may take a few minutes)..." rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/ 2>/dev/null || \ scp -r {ssh_alias}:server/parquet/* ./server/parquet/ 2>/dev/null || true requires: ["test_ssh_connection", "create_folders"] message: | Downloading data from server... Data downloaded successfully! - name: "setup_venv" description: "Create Python virtual environment and install dependencies" check: "test -f ./.venv/bin/python || test -f ./.venv/Scripts/python.exe" action: | if command -v python3 >/dev/null 2>&1; then PYTHON_CMD=python3 else PYTHON_CMD=python fi $PYTHON_CMD -m venv ./.venv if [ -f ./.venv/bin/activate ]; then source ./.venv/bin/activate else source ./.venv/Scripts/activate fi pip install --upgrade pip --quiet pip install pandas pyarrow duckdb pyyaml python-dotenv --quiet requires: ["create_folders"] message: | Setting up Python environment... Python environment ready! - name: "initialize_duckdb" description: "Initialize DuckDB views on Parquet files" action: | if [[ -f server/scripts/setup_views.sh ]]; then bash server/scripts/setup_views.sh else echo "setup_views.sh not found, skipping DuckDB initialization" fi requires: ["download_server_data", "setup_venv"] message: | Initializing DuckDB analytical database... DuckDB initialized! All tables ready for queries. - name: "setup_claude_project_context" description: "Create Claude Code project context files" action: | # Generate CLAUDE.md from template if [[ -f "./server/docs/setup/claude_md_template.txt" ]]; then sed -e "s/{username}/{username}/g" \ ./server/docs/setup/claude_md_template.txt > ./CLAUDE.md chmod 644 ./CLAUDE.md fi # Create CLAUDE.local.md for personal customizations if [[ ! -f "./CLAUDE.local.md" ]]; then cat > ./CLAUDE.local.md << 'LOCALEOF' # CLAUDE.local.md Your personal instructions for Claude Code in this project. This file is NOT overwritten by data sync - it is yours to customize. ## Your Custom Instructions Add your preferences, shortcuts, or project-specific notes below: LOCALEOF chmod 644 ./CLAUDE.local.md fi # Copy project permissions mkdir -p ./.claude if [[ -f "./server/docs/setup/claude_settings.json" ]]; then cp ./server/docs/setup/claude_settings.json ./.claude/settings.json fi requires: ["download_server_data"] message: | CLAUDE.md created (auto-updated on sync). CLAUDE.local.md created (your personal customizations, never overwritten). - name: "check_setup" description: "Verify setup completed successfully" requires: ["initialize_duckdb", "setup_claude_project_context"] message: | Setup complete! Your AI Data Analyst environment is ready. What's been set up: - Data tables synced as local Parquet files - DuckDB analytical database with views configured - Python environment with pandas, pyarrow, duckdb - Helper scripts for data sync You can now start asking questions about your data. See server/docs/data_description.md for table schemas. To sync latest data: bash server/scripts/sync_data.sh # Python dependencies dependencies: - pandas>=2.0.0 - pyarrow>=12.0.0 - duckdb>=0.9.0 - pyyaml>=6.0 - python-dotenv>=1.0.0