Generate setup instructions from bootstrap.yaml (single source of truth)

- Rewrite bootstrap.yaml as clean structured YAML with steps, commands,
  descriptions, conditions, and notes
- Add _generate_setup_instructions() in app.py that reads YAML, substitutes
  placeholders, and produces clipboard-ready plain text
- Replace 50-line hardcoded JS string builder with single tojson variable
- All setup instructions now editable in one YAML file
This commit is contained in:
Petr 2026-03-15 00:37:19 +01:00
parent 85c07732b2
commit 508d92771f
3 changed files with 160 additions and 334 deletions

View file

@ -1,280 +1,80 @@
version: "1.0" # AI Data Analyst - Setup Instructions
project_name: "ai_data_analyst" #
project_dir: "." # Single source of truth for local environment setup.
# Webapp reads this, substitutes placeholders, and generates clipboard text.
# Placeholders filled by webapp per-user: #
# Placeholders (filled from instance.yaml by webapp):
# {server_host} - server IP or hostname # {server_host} - server IP or hostname
# {ssh_alias} - SSH config alias (default: "data-analyst", configurable to avoid conflicts) # {ssh_alias} - SSH config alias (instance.yaml: server.ssh_alias)
# {ssh_key} - SSH private key path (default: ~/.ssh/data_analyst_server) # {ssh_key} - SSH private key path (instance.yaml: server.ssh_key)
# {username} - analyst username on server # {username} - analyst username on server
# {webapp_url} - webapp URL for registration # {webapp_url} - webapp URL
# {project_dir} - local project folder name (instance.yaml: server.project_dir)
server: header: |
host: "{server_host}" Set up my AI Data Analyst local environment.
hostname: "{ssh_alias}"
connection:
server_host: "{server_host}"
webapp_url: "{webapp_url}" webapp_url: "{webapp_url}"
username: "{username}"
ssh_key: "{ssh_key}"
setup: steps:
steps: - name: "SSH config"
- name: "detect_existing_project" description: |
description: "Check if project already exists" Check ~/.ssh/config - if a Host entry named "{ssh_alias}" already exists
check: "test -f ./CLAUDE.md" with a DIFFERENT server, ask me what name to use instead.
on_success: "verify_project_identity" Otherwise add:
message: |
Existing CLAUDE.md detected in current directory.
Verifying this is an AI Data Analyst project...
- name: "verify_project_identity"
description: "Verify this is the correct project type"
check: "grep -q 'AI Data Analyst' ./CLAUDE.md"
on_success: "existing_project_confirmed"
on_failure: |
Wrong project type detected.
The CLAUDE.md file exists but doesn't match AI Data Analyst.
Options:
- Choose a different directory for setup
- Remove existing CLAUDE.md if this was a mistake
message: |
AI Data Analyst project confirmed.
This directory is already set up. You can:
- Sync latest data: bash server/scripts/sync_data.sh
- View project context: cat CLAUDE.md
To recreate CLAUDE.md: rm -f ./CLAUDE.md and re-run bootstrap
- name: "check_directory_empty"
description: "Warn if directory is not empty"
check: "[ $(ls -A . 2>/dev/null | wc -l) -eq 0 ]"
on_failure: |
Current directory is not empty.
This setup will create:
- .claude/ (project metadata)
- server/ (read-only data from server: parquet files, docs, scripts)
- user/ (your workspace: DuckDB database, artifacts)
- .venv/ (Python virtual environment)
Make sure you're in the correct directory before continuing.
warn_only: true
message: |
Starting setup in current directory...
- name: "generate_ssh_key"
description: "Generate SSH key for server authentication"
check: "test -f {ssh_key}.pub"
action: |
ssh-keygen -t ed25519 -f {ssh_key} -C "{username}@{ssh_alias}" -N ''
on_success: "show_public_key"
message: |
SSH key generated successfully.
- name: "show_public_key"
description: "Display SSH public key to user"
action: "cat {ssh_key}.pub"
message: |
Your SSH public key has been generated!
Next steps:
1. Copy the public key shown above
2. Go to: {webapp_url}
3. Sign in and paste the key into the form
4. Wait a few seconds for account creation
5. Come back here to continue
wait_for_user: true
- name: "add_ssh_config"
description: "Add SSH config entry"
requires: ["show_public_key"]
check: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo ok 2>/dev/null"
action: |
mkdir -p ~/.ssh
# Check if alias already exists with a different host
if grep -q 'Host {ssh_alias}' ~/.ssh/config 2>/dev/null; then
EXISTING_HOST=$(awk '/Host {ssh_alias}/,/Host / {if (/HostName/) print $2}' ~/.ssh/config | head -1)
if [[ "$EXISTING_HOST" != "{server_host}" ]]; then
echo "WARNING: SSH alias '{ssh_alias}' already exists pointing to $EXISTING_HOST"
echo "Skipping SSH config - please resolve manually or use a different alias."
exit 1
fi
else
cat >> ~/.ssh/config << 'EOF'
Host {ssh_alias} Host {ssh_alias}
HostName {server_host} HostName {server_host}
User {username} User {username}
IdentityFile {ssh_key} IdentityFile {ssh_key}
StrictHostKeyChecking accept-new StrictHostKeyChecking accept-new
EOF Then test: ssh {ssh_alias} echo ok
chmod 600 ~/.ssh/config
fi
message: |
SSH configuration added for {ssh_alias} server.
- name: "test_ssh_connection" - name: "Create project folders"
description: "Test SSH connection to server" commands:
requires: ["add_ssh_config"] - "mkdir -p server/docs server/scripts server/parquet server/metadata server/examples"
action: "ssh -o ConnectTimeout=5 -o BatchMode=yes {ssh_alias} echo 'ok' 2>/dev/null" - "mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions"
message: | - 'printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > .sync_connection'
Testing connection to data server...
Connection successful!
on_failure: |
SSH connection failed!
Please verify: - name: "Download from server"
1. You completed registration at {webapp_url} description: |
2. Your account was created successfully Use rsync with --no-perms --no-group to avoid macOS permission errors.
3. Your username matches: {username} Skip directories that don't exist on the server (rsync exit code 23 = missing source).
retry: true commands:
max_retries: 3 - "rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/"
- "rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/"
- "rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/"
- "rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/"
- "rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/"
note: "Some folders may be empty if data sync hasn't run on the server yet. That's OK."
- name: "create_folders" - name: "Set up Python venv"
description: "Create local project structure and save connection details" commands:
action: | - "python3 -m venv .venv"
mkdir -p ./server/docs ./server/scripts ./server/examples ./server/parquet ./server/metadata - "source .venv/bin/activate"
mkdir -p ./user/duckdb ./user/notifications ./user/artifacts ./user/scripts ./user/parquet ./user/sessions - "pip install pandas pyarrow duckdb pyyaml python-dotenv"
# Save connection details for sync_data.sh to use when generating CLAUDE.md - name: "Initialize DuckDB"
printf "ssh_alias={ssh_alias}\nserver_host={server_host}\nwebapp_url={webapp_url}\n" > ./.sync_connection condition: "only if server/scripts/setup_views.sh exists"
message: | commands:
Project structure created (server/, user/). - "bash server/scripts/setup_views.sh"
- name: "check_rsync" - name: "Create CLAUDE.md"
description: "Verify rsync is available" condition: "if server/docs/setup/claude_md_template.txt exists"
check: "command -v rsync >/dev/null 2>&1" description: |
warn_only: true Copy the template and replace these placeholders:
on_failure: | {username} -> {username}
rsync is not installed. Install it for better sync performance: {ssh_alias} -> {ssh_alias}
{server_host} -> {server_host}
{webapp_url} -> {webapp_url}
Also create CLAUDE.local.md for personal notes (never overwritten by sync).
Also copy server/docs/setup/claude_settings.json to .claude/settings.json.
macOS: brew install rsync existing_project:
Ubuntu: sudo apt-get install -y rsync check: "If CLAUDE.md already exists and contains 'AI Data Analyst'"
RHEL: sudo yum install -y rsync message: |
This directory is already set up. Just sync latest data:
Without rsync, scp will be used as fallback (slower). bash server/scripts/sync_data.sh
- name: "download_server_data"
description: "Download all server data (scripts, docs, metadata, parquet)"
action: |
echo "Syncing scripts..."
rsync -avz --no-perms --no-group {ssh_alias}:server/scripts/ ./server/scripts/ 2>/dev/null || \
scp -r {ssh_alias}:server/scripts/* ./server/scripts/ 2>/dev/null || true
echo "Syncing documentation..."
rsync -avz --no-perms --no-group {ssh_alias}:server/docs/ ./server/docs/ 2>/dev/null || \
scp -r {ssh_alias}:server/docs/* ./server/docs/ 2>/dev/null || true
echo "Syncing examples..."
rsync -avz --no-perms --no-group {ssh_alias}:server/examples/ ./server/examples/ 2>/dev/null || true
echo "Syncing metadata..."
rsync -avz --no-perms --no-group {ssh_alias}:server/metadata/ ./server/metadata/ 2>/dev/null || \
scp -r {ssh_alias}:server/metadata/* ./server/metadata/ 2>/dev/null || true
echo "Syncing parquet data (this may take a few minutes)..."
rsync -avz --no-perms --no-group --progress {ssh_alias}:server/parquet/ ./server/parquet/ 2>/dev/null || \
scp -r {ssh_alias}:server/parquet/* ./server/parquet/ 2>/dev/null || true
requires: ["test_ssh_connection", "create_folders"]
message: |
Downloading data from server...
Data downloaded successfully!
- name: "setup_venv"
description: "Create Python virtual environment and install dependencies"
check: "test -f ./.venv/bin/python || test -f ./.venv/Scripts/python.exe"
action: |
if command -v python3 >/dev/null 2>&1; then
PYTHON_CMD=python3
else
PYTHON_CMD=python
fi
$PYTHON_CMD -m venv ./.venv
if [ -f ./.venv/bin/activate ]; then
source ./.venv/bin/activate
else
source ./.venv/Scripts/activate
fi
pip install --upgrade pip --quiet
pip install pandas pyarrow duckdb pyyaml python-dotenv --quiet
requires: ["create_folders"]
message: |
Setting up Python environment...
Python environment ready!
- name: "initialize_duckdb"
description: "Initialize DuckDB views on Parquet files"
action: |
if [[ -f server/scripts/setup_views.sh ]]; then
bash server/scripts/setup_views.sh
else
echo "setup_views.sh not found, skipping DuckDB initialization"
fi
requires: ["download_server_data", "setup_venv"]
message: |
Initializing DuckDB analytical database...
DuckDB initialized! All tables ready for queries.
- name: "setup_claude_project_context"
description: "Create Claude Code project context files"
action: |
# Generate CLAUDE.md from template
if [[ -f "./server/docs/setup/claude_md_template.txt" ]]; then
sed -e "s/{username}/{username}/g" \
./server/docs/setup/claude_md_template.txt > ./CLAUDE.md
chmod 644 ./CLAUDE.md
fi
# Create CLAUDE.local.md for personal customizations
if [[ ! -f "./CLAUDE.local.md" ]]; then
cat > ./CLAUDE.local.md << 'LOCALEOF'
# CLAUDE.local.md
Your personal instructions for Claude Code in this project.
This file is NOT overwritten by data sync - it is yours to customize.
## Your Custom Instructions
Add your preferences, shortcuts, or project-specific notes below:
LOCALEOF
chmod 644 ./CLAUDE.local.md
fi
# Copy project permissions
mkdir -p ./.claude
if [[ -f "./server/docs/setup/claude_settings.json" ]]; then
cp ./server/docs/setup/claude_settings.json ./.claude/settings.json
fi
requires: ["download_server_data"]
message: |
CLAUDE.md created (auto-updated on sync).
CLAUDE.local.md created (your personal customizations, never overwritten).
- name: "check_setup"
description: "Verify setup completed successfully"
requires: ["initialize_duckdb", "setup_claude_project_context"]
message: |
Setup complete! Your AI Data Analyst environment is ready.
What's been set up:
- Data tables synced as local Parquet files
- DuckDB analytical database with views configured
- Python environment with pandas, pyarrow, duckdb
- Helper scripts for data sync
You can now start asking questions about your data.
See server/docs/data_description.md for table schemas.
To sync latest data: bash server/scripts/sync_data.sh
# Python dependencies
dependencies:
- pandas>=2.0.0
- pyarrow>=12.0.0
- duckdb>=0.9.0
- pyyaml>=6.0
- python-dotenv>=1.0.0

View file

@ -225,6 +225,92 @@ FALLBACK_DATA_STATS = {
} }
def _generate_setup_instructions(username: str) -> str:
"""Generate clipboard-ready setup instructions from bootstrap.yaml.
Reads the structured YAML, substitutes placeholders from instance config,
and produces plain text that users paste into Claude Code.
"""
bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml")
with open(bootstrap_path, "r") as f:
bootstrap = yaml.safe_load(f)
webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
placeholders = {
"{username}": username,
"{server_host}": Config.SERVER_HOST,
"{server_hostname}": Config.SERVER_HOSTNAME,
"{ssh_alias}": Config.SSH_ALIAS,
"{ssh_key}": Config.SSH_KEY,
"{project_dir}": Config.PROJECT_DIR,
"{webapp_url}": webapp_url,
}
def sub(text: str) -> str:
for key, val in placeholders.items():
text = text.replace(key, val)
return text
lines = []
# Header
if "header" in bootstrap:
lines.append(sub(bootstrap["header"]).strip())
lines.append("")
# Connection details
conn = bootstrap.get("connection", {})
if conn:
lines.append("Connection details:")
for key, val in conn.items():
label = key.replace("_", " ").replace("host", "IP").replace("url", "URL")
display_val = sub(val)
if key == "ssh_key":
display_val += " (already generated)"
lines.append(f" {label}: {display_val}")
lines.append("")
# Steps
lines.append("Steps:")
lines.append("")
for i, step in enumerate(bootstrap.get("steps", []), 1):
name = sub(step.get("name", ""))
condition = step.get("condition", "")
if condition:
lines.append(f"{i}. {name} ({sub(condition)}):")
else:
lines.append(f"{i}. {name}:")
# Description (free text instructions for Claude)
desc = step.get("description", "")
if desc:
for line in sub(desc).strip().splitlines():
lines.append(f" {line}")
# Commands (executable shell commands)
commands = step.get("commands", [])
for cmd in commands:
lines.append(f" {sub(cmd)}")
# Note
note = step.get("note", "")
if note:
lines.append(f" Note: {sub(note)}")
lines.append("")
# Existing project hint
existing = bootstrap.get("existing_project", {})
if existing:
msg = existing.get("message", "")
if msg:
lines.append("If this directory already has CLAUDE.md with 'AI Data Analyst':")
for line in sub(msg).strip().splitlines():
lines.append(f" {line}")
return "\n".join(lines)
def _load_data_stats() -> dict: def _load_data_stats() -> dict:
"""Load aggregate data stats from sync_state.json, with hardcoded fallback.""" """Load aggregate data stats from sync_state.json, with hardcoded fallback."""
try: try:
@ -812,25 +898,12 @@ def register_routes(app: Flask) -> None:
# Check if username is available (for new registrations) # Check if username is available (for new registrations)
username_available, username_error = is_username_available(username) username_available, username_error = is_username_available(username)
# Read bootstrap YAML for Claude Code setup instructions # Generate setup instructions from bootstrap.yaml
bootstrap_yaml = "" setup_instructions = ""
try: try:
bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml") setup_instructions = _generate_setup_instructions(username)
with open(bootstrap_path, "r") as f:
bootstrap_yaml_template = f.read()
# Inject username and server info into template
bootstrap_yaml = bootstrap_yaml_template.replace("{username}", username)
bootstrap_yaml = bootstrap_yaml.replace("{server_host}", Config.SERVER_HOST)
bootstrap_yaml = bootstrap_yaml.replace("{server_hostname}", Config.SERVER_HOSTNAME)
bootstrap_yaml = bootstrap_yaml.replace("{ssh_alias}", Config.SSH_ALIAS)
bootstrap_yaml = bootstrap_yaml.replace("{ssh_key}", Config.SSH_KEY)
bootstrap_yaml = bootstrap_yaml.replace("{project_dir}", Config.PROJECT_DIR)
webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
bootstrap_yaml = bootstrap_yaml.replace("{webapp_url}", webapp_url)
except Exception as e: except Exception as e:
logger.warning(f"Could not read bootstrap.yaml: {e}") logger.warning(f"Could not generate setup instructions: {e}")
# Get Telegram link status # Get Telegram link status
telegram_status = get_telegram_status(username) telegram_status = get_telegram_status(username)
@ -879,7 +952,7 @@ def register_routes(app: Flask) -> None:
ssh_alias=Config.SSH_ALIAS, ssh_alias=Config.SSH_ALIAS,
ssh_key=Config.SSH_KEY, ssh_key=Config.SSH_KEY,
project_dir=Config.PROJECT_DIR, project_dir=Config.PROJECT_DIR,
bootstrap_yaml=bootstrap_yaml, setup_instructions=setup_instructions,
telegram_status=telegram_status, telegram_status=telegram_status,
desktop_status=desktop_status, desktop_status=desktop_status,
data_stats=data_stats, data_stats=data_stats,

View file

@ -2423,54 +2423,7 @@
} }
function copyBootstrapInstructions(btn) { function copyBootstrapInstructions(btn) {
var username = {{ username | tojson }}; var instructions = {{ setup_instructions | tojson }};
var serverHost = {{ server_host | tojson }};
var serverHostname = {{ server_hostname | tojson }};
var webappUrl = serverHostname ? 'http://' + serverHostname : '';
var sshAlias = {{ ssh_alias | tojson }};
var sshKey = {{ ssh_key | tojson }};
var instructions = 'Set up my AI Data Analyst local environment.\n\n'
+ 'Connection details:\n'
+ ' Server IP: ' + serverHost + '\n'
+ ' Webapp: ' + webappUrl + '\n'
+ ' My username: ' + username + '\n'
+ ' SSH key: ' + sshKey + ' (already generated)\n\n'
+ 'Steps:\n\n'
+ '1. SSH config\n'
+ ' Check ~/.ssh/config - if a Host entry named "' + sshAlias + '" already exists\n'
+ ' with a DIFFERENT server, ask me what name to use instead.\n'
+ ' Otherwise add:\n'
+ ' Host ' + sshAlias + '\n'
+ ' HostName ' + serverHost + '\n'
+ ' User ' + username + '\n'
+ ' IdentityFile ' + sshKey + '\n'
+ ' StrictHostKeyChecking accept-new\n'
+ ' Then test: ssh ' + sshAlias + ' echo ok\n\n'
+ '2. Create project folders:\n'
+ ' mkdir -p server/docs server/scripts server/parquet server/metadata server/examples\n'
+ ' mkdir -p user/duckdb user/notifications user/artifacts user/scripts user/parquet user/sessions\n'
+ ' printf "ssh_alias=' + sshAlias + '\\nserver_host=' + serverHost + '\\nwebapp_url=' + webappUrl + '\\n" > .sync_connection\n\n'
+ '3. Download from server via rsync (use --no-perms --no-group to avoid macOS permission errors).\n'
+ ' Skip directories that don\'t exist on the server (rsync exit code 23 = missing source).\n'
+ ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/scripts/ ./server/scripts/\n'
+ ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/docs/ ./server/docs/\n'
+ ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/examples/ ./server/examples/\n'
+ ' rsync -avz --no-perms --no-group ' + sshAlias + ':server/metadata/ ./server/metadata/\n'
+ ' rsync -avz --no-perms --no-group --progress ' + sshAlias + ':server/parquet/ ./server/parquet/\n'
+ ' Note: some folders may be empty if data sync hasn\'t run on the server yet. That\'s OK.\n\n'
+ '4. Set up Python venv:\n'
+ ' python3 -m venv .venv\n'
+ ' source .venv/bin/activate\n'
+ ' pip install pandas pyarrow duckdb pyyaml python-dotenv\n\n'
+ '5. Initialize DuckDB (only if server/scripts/setup_views.sh exists):\n'
+ ' bash server/scripts/setup_views.sh\n\n'
+ '6. Create CLAUDE.md (if server/docs/setup/claude_md_template.txt exists):\n'
+ ' Copy the template, replace placeholders:\n'
+ ' {username} -> ' + username + '\n'
+ ' {ssh_alias} -> ' + sshAlias + '\n'
+ ' {server_host} -> ' + serverHost + '\n'
+ ' {webapp_url} -> ' + webappUrl + '\n';
var button = btn || document.getElementById('bootstrapCopyBtn'); var button = btn || document.getElementById('bootstrapCopyBtn');
var origText = button.textContent; var origText = button.textContent;