- Remove all Keboola-specific content (metric categories, MRR/ARR refs,
corporate memory, hardcoded server IP)
- Add {ssh_alias}, {server_host}, {webapp_url} placeholders
- Bootstrap saves .sync_connection file with instance details
- sync_data.sh reads .sync_connection to substitute all placeholders
- Text instructions in dashboard include .sync_connection step
284 lines
10 KiB
YAML
284 lines
10 KiB
YAML
version: "1.0"
|
|
project_name: "ai_data_analyst"
|
|
project_dir: "."
|
|
|
|
# Placeholders filled by webapp per-user:
|
|
# {server_host} - server IP or hostname
|
|
# {ssh_alias} - SSH config alias (default: "data-analyst", configurable to avoid conflicts)
|
|
# {ssh_key} - SSH private key path (default: ~/.ssh/data_analyst_server)
|
|
# {username} - analyst username on server
|
|
# {webapp_url} - webapp URL for registration
|
|
|
|
server:
|
|
host: "{server_host}"
|
|
hostname: "{ssh_alias}"
|
|
webapp_url: "{webapp_url}"
|
|
|
|
setup:
|
|
steps:
|
|
- name: "detect_existing_project"
|
|
description: "Check if project already exists"
|
|
check: "test -f ./CLAUDE.md"
|
|
on_success: "verify_project_identity"
|
|
message: |
|
|
Existing CLAUDE.md detected in current directory.
|
|
Verifying this is an AI Data Analyst project...
|
|
|
|
- name: "verify_project_identity"
|
|
description: "Verify this is the correct project type"
|
|
check: "grep -q 'AI Data Analyst' ./CLAUDE.md"
|
|
on_success: "existing_project_confirmed"
|
|
on_failure: |
|
|
Wrong project type detected.
|
|
The CLAUDE.md file exists but doesn't match AI Data Analyst.
|
|
|
|
Options:
|
|
- Choose a different directory for setup
|
|
- Remove existing CLAUDE.md if this was a mistake
|
|
message: |
|
|
AI Data Analyst project confirmed.
|
|
|
|
This directory is already set up. You can:
|
|
- Sync latest data: bash server/scripts/sync_data.sh
|
|
- View project context: cat CLAUDE.md
|
|
|
|
To recreate CLAUDE.md: rm -f ./CLAUDE.md and re-run bootstrap
|
|
|
|
- name: "check_directory_empty"
|
|
description: "Warn if directory is not empty"
|
|
check: "[ $(ls -A . 2>/dev/null | wc -l) -eq 0 ]"
|
|
on_failure: |
|
|
Current directory is not empty.
|
|
|
|
This setup will create:
|
|
- .claude/ (project metadata)
|
|
- server/ (read-only data from server: parquet files, docs, scripts)
|
|
- user/ (your workspace: DuckDB database, artifacts)
|
|
- .venv/ (Python virtual environment)
|
|
|
|
Make sure you're in the correct directory before continuing.
|
|
warn_only: true
|
|
message: |
|
|
Starting setup in current directory...
|
|
|
|
- name: "generate_ssh_key"
|
|
description: "Generate SSH key for server authentication"
|
|
check: "test -f {ssh_key}.pub"
|
|
action: |
|
|
ssh-keygen -t ed25519 -f {ssh_key} -C "{username}@{ssh_alias}" -N ''
|
|
on_success: "show_public_key"
|
|
message: |
|
|
SSH key generated successfully.
|
|
|
|
- name: "show_public_key"
|
|
description: "Display SSH public key to user"
|
|
action: "cat {ssh_key}.pub"
|
|
message: |
|
|
Your SSH public key has been generated!
|
|
|
|
Next steps:
|
|
1. Copy the public key shown above
|
|
2. Go to: {webapp_url}
|
|
3. Sign in and paste the key into the form
|
|
4. Wait a few seconds for account creation
|
|
5. Come back here to continue
|
|
wait_for_user: true
|
|
|
|
- name: "add_ssh_config"
|
|
description: "Add SSH config entry"
|
|
requires: ["show_public_key"]
|
|
check: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo ok 2>/dev/null"
|
|
action: |
|
|
mkdir -p ~/.ssh
|
|
|
|
# Check if alias already exists with a different host
|
|
if grep -q 'Host {ssh_alias}' ~/.ssh/config 2>/dev/null; then
|
|
EXISTING_HOST=$(awk '/Host {ssh_alias}/,/Host / {if (/HostName/) print $2}' ~/.ssh/config | head -1)
|
|
if [[ "$EXISTING_HOST" != "{server_host}" ]]; then
|
|
echo "WARNING: SSH alias '{ssh_alias}' already exists pointing to $EXISTING_HOST"
|
|
echo "Skipping SSH config - please resolve manually or use a different alias."
|
|
exit 1
|
|
fi
|
|
else
|
|
cat >> ~/.ssh/config << 'EOF'
|
|
|
|
Host {ssh_alias}
|
|
HostName {server_host}
|
|
User {username}
|
|
IdentityFile {ssh_key}
|
|
StrictHostKeyChecking accept-new
|
|
EOF
|
|
chmod 600 ~/.ssh/config
|
|
fi
|
|
message: |
|
|
SSH configuration added for {ssh_alias} server.
|
|
|
|
- name: "test_ssh_connection"
|
|
description: "Test SSH connection to server"
|
|
requires: ["add_ssh_config"]
|
|
action: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo 'ok' 2>/dev/null"
|
|
message: |
|
|
Testing connection to data server...
|
|
Connection successful!
|
|
on_failure: |
|
|
SSH connection failed!
|
|
|
|
Please verify:
|
|
1. You completed registration at {webapp_url}
|
|
2. Your account was created successfully
|
|
3. Your username matches: {username}
|
|
retry: true
|
|
max_retries: 3
|
|
|
|
- name: "create_folders"
|
|
description: "Create local project structure and save connection details"
|
|
action: |
|
|
mkdir -p ./server/docs ./server/scripts ./server/examples ./server/parquet ./server/metadata
|
|
mkdir -p ./user/duckdb ./user/notifications ./user/artifacts ./user/scripts ./user/parquet ./user/sessions
|
|
|
|
# Save connection details for sync_data.sh to use when generating CLAUDE.md
|
|
cat > ./.sync_connection << 'CONN'
|
|
ssh_alias={ssh_alias}
|
|
server_host={server_host}
|
|
webapp_url={webapp_url}
|
|
CONN
|
|
message: |
|
|
Project structure created (server/, user/).
|
|
|
|
- name: "check_rsync"
|
|
description: "Verify rsync is available"
|
|
check: "command -v rsync >/dev/null 2>&1"
|
|
warn_only: true
|
|
on_failure: |
|
|
rsync is not installed. Install it for better sync performance:
|
|
|
|
macOS: brew install rsync
|
|
Ubuntu: sudo apt-get install -y rsync
|
|
RHEL: sudo yum install -y rsync
|
|
|
|
Without rsync, scp will be used as fallback (slower).
|
|
|
|
- name: "download_server_data"
|
|
description: "Download all server data (scripts, docs, metadata, parquet)"
|
|
action: |
|
|
echo "Syncing scripts..."
|
|
rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/ 2>/dev/null || \
|
|
scp -r {ssh_alias}:server/scripts/* ./server/scripts/ 2>/dev/null || true
|
|
|
|
echo "Syncing documentation..."
|
|
rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/ 2>/dev/null || \
|
|
scp -r {ssh_alias}:server/docs/* ./server/docs/ 2>/dev/null || true
|
|
|
|
echo "Syncing examples..."
|
|
rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/ 2>/dev/null || true
|
|
|
|
echo "Syncing metadata..."
|
|
rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/ 2>/dev/null || \
|
|
scp -r {ssh_alias}:server/metadata/* ./server/metadata/ 2>/dev/null || true
|
|
|
|
echo "Syncing parquet data (this may take a few minutes)..."
|
|
rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/ 2>/dev/null || \
|
|
scp -r {ssh_alias}:server/parquet/* ./server/parquet/ 2>/dev/null || true
|
|
requires: ["test_ssh_connection", "create_folders"]
|
|
message: |
|
|
Downloading data from server...
|
|
Data downloaded successfully!
|
|
|
|
- name: "setup_venv"
|
|
description: "Create Python virtual environment and install dependencies"
|
|
check: "test -f ./.venv/bin/python || test -f ./.venv/Scripts/python.exe"
|
|
action: |
|
|
if command -v python3 >/dev/null 2>&1; then
|
|
PYTHON_CMD=python3
|
|
else
|
|
PYTHON_CMD=python
|
|
fi
|
|
|
|
$PYTHON_CMD -m venv ./.venv
|
|
|
|
if [ -f ./.venv/bin/activate ]; then
|
|
source ./.venv/bin/activate
|
|
else
|
|
source ./.venv/Scripts/activate
|
|
fi
|
|
|
|
pip install --upgrade pip --quiet
|
|
pip install pandas pyarrow duckdb pyyaml python-dotenv --quiet
|
|
requires: ["create_folders"]
|
|
message: |
|
|
Setting up Python environment...
|
|
Python environment ready!
|
|
|
|
- name: "initialize_duckdb"
|
|
description: "Initialize DuckDB views on Parquet files"
|
|
action: |
|
|
if [[ -f server/scripts/setup_views.sh ]]; then
|
|
bash server/scripts/setup_views.sh
|
|
else
|
|
echo "setup_views.sh not found, skipping DuckDB initialization"
|
|
fi
|
|
requires: ["download_server_data", "setup_venv"]
|
|
message: |
|
|
Initializing DuckDB analytical database...
|
|
DuckDB initialized! All tables ready for queries.
|
|
|
|
- name: "setup_claude_project_context"
|
|
description: "Create Claude Code project context files"
|
|
action: |
|
|
# Generate CLAUDE.md from template
|
|
if [[ -f "./server/docs/setup/claude_md_template.txt" ]]; then
|
|
sed -e "s/{username}/{username}/g" \
|
|
./server/docs/setup/claude_md_template.txt > ./CLAUDE.md
|
|
chmod 644 ./CLAUDE.md
|
|
fi
|
|
|
|
# Create CLAUDE.local.md for personal customizations
|
|
if [[ ! -f "./CLAUDE.local.md" ]]; then
|
|
cat > ./CLAUDE.local.md << 'LOCALEOF'
|
|
# CLAUDE.local.md
|
|
|
|
Your personal instructions for Claude Code in this project.
|
|
This file is NOT overwritten by data sync - it is yours to customize.
|
|
|
|
## Your Custom Instructions
|
|
|
|
Add your preferences, shortcuts, or project-specific notes below:
|
|
|
|
LOCALEOF
|
|
chmod 644 ./CLAUDE.local.md
|
|
fi
|
|
|
|
# Copy project permissions
|
|
mkdir -p ./.claude
|
|
if [[ -f "./server/docs/setup/claude_settings.json" ]]; then
|
|
cp ./server/docs/setup/claude_settings.json ./.claude/settings.json
|
|
fi
|
|
requires: ["download_server_data"]
|
|
message: |
|
|
CLAUDE.md created (auto-updated on sync).
|
|
CLAUDE.local.md created (your personal customizations, never overwritten).
|
|
|
|
- name: "check_setup"
|
|
description: "Verify setup completed successfully"
|
|
requires: ["initialize_duckdb", "setup_claude_project_context"]
|
|
message: |
|
|
Setup complete! Your AI Data Analyst environment is ready.
|
|
|
|
What's been set up:
|
|
- Data tables synced as local Parquet files
|
|
- DuckDB analytical database with views configured
|
|
- Python environment with pandas, pyarrow, duckdb
|
|
- Helper scripts for data sync
|
|
|
|
You can now start asking questions about your data.
|
|
See server/docs/data_description.md for table schemas.
|
|
|
|
To sync latest data: bash server/scripts/sync_data.sh
|
|
|
|
# Python dependencies
|
|
dependencies:
|
|
- pandas>=2.0.0
|
|
- pyarrow>=12.0.0
|
|
- duckdb>=0.9.0
|
|
- pyyaml>=6.0
|
|
- python-dotenv>=1.0.0
|