Phase 1 - Internal reference cleanup:
- Delete dev_docs/meetings/ (internal meeting notes/transcripts)
- Replace hardcoded usernames (padak/matejkys/dasa) with deploy/generic
- Replace "Internal AI Data Analyst" with "AI Data Analyst"
- Replace keboola/internal_ai_data_analyst URLs with your-org/ai-data-analyst
- Replace /tmp/keboola_load/ with /tmp/data_analyst_staging/ in dev_docs
Phase 2 - Deployment hardening:
- Tighten sudoers wildcards to explicit paths (visudo, sudoers cp)
- setup.sh creates all groups (data-ops, dataread, data-private) and deploy user
- webapp-setup.sh copies sudoers-webapp from repo instead of inline definition
- deploy.sh conditional copy for data_description.md (not in git for OSS)
- deploy.sh ownership changed to deploy:data-ops for /data/{scripts,docs,examples}
Phase 3 - Config and misc:
- Add ${ENV_VAR} interpolation to config/loader.py
- Expand config/instance.yaml.example with all sections (admins, deployment, auth, etc.)
- Create config/.env.template for secret values
- Add MIT LICENSE
- Fix .gitignore: add .venv/, docs/data_description.md
- Fix README.md: CSV status Planned, remove metrics/, update license text
- Translate Czech comments in requirements.txt to English
- Fix test_account_service.py: mock username mapping instead of relying on instance config
All 118 tests pass.
172 lines
4.9 KiB
Bash
Executable file
172 lines
4.9 KiB
Bash
Executable file
#!/bin/bash
|
|
# Setup script for Data Analyst Web App
|
|
# Run this ONCE on the server to set up the web application
|
|
# Must be run as root or with sudo
|
|
|
|
set -euo pipefail
|
|
|
|
# Server hostname - required for SSL, Nginx, and OAuth configuration
|
|
if [[ -z "${SERVER_HOSTNAME:-}" ]]; then
|
|
read -p "Enter server hostname (e.g., data.example.com): " DOMAIN
|
|
if [[ -z "$DOMAIN" ]]; then
|
|
echo "ERROR: SERVER_HOSTNAME is required"
|
|
exit 1
|
|
fi
|
|
else
|
|
DOMAIN="$SERVER_HOSTNAME"
|
|
fi
|
|
APP_DIR="/opt/data-analyst"
|
|
REPO_DIR="${APP_DIR}/repo"
|
|
VENV_DIR="${APP_DIR}/.venv"
|
|
LOG_DIR="${APP_DIR}/logs"
|
|
ENV_FILE="${APP_DIR}/.env"
|
|
|
|
echo "=== Data Analyst Web App Setup ==="
|
|
echo ""
|
|
|
|
# Check if running as root
|
|
if [[ $EUID -ne 0 ]]; then
|
|
echo "This script must be run as root (use sudo)"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if main setup has been run
|
|
if [[ ! -d "$REPO_DIR" ]]; then
|
|
echo "ERROR: Repository not found at $REPO_DIR"
|
|
echo "Please run server/setup.sh first."
|
|
exit 1
|
|
fi
|
|
|
|
# Install system dependencies
|
|
echo "Installing system dependencies..."
|
|
apt-get update
|
|
apt-get install -y nginx certbot python3-certbot-nginx
|
|
|
|
# Install Python dependencies for webapp
|
|
echo "Installing Python dependencies..."
|
|
source "${VENV_DIR}/bin/activate"
|
|
pip install flask authlib gunicorn
|
|
deactivate
|
|
|
|
# Create log files for webapp
|
|
echo "Creating log files..."
|
|
touch "${LOG_DIR}/webapp-access.log"
|
|
touch "${LOG_DIR}/webapp-error.log"
|
|
chown www-data:www-data "${LOG_DIR}/webapp-access.log" "${LOG_DIR}/webapp-error.log"
|
|
|
|
# Check/create .env file
|
|
if [[ ! -f "$ENV_FILE" ]]; then
|
|
echo "Creating .env file template..."
|
|
cat > "$ENV_FILE" << 'EOF'
|
|
# Web App Configuration
|
|
# Generate secret key with: python -c "import secrets; print(secrets.token_hex(32))"
|
|
WEBAPP_SECRET_KEY=CHANGE_ME_GENERATE_A_SECURE_KEY
|
|
|
|
# Google OAuth credentials (from Google Cloud Console)
|
|
GOOGLE_CLIENT_ID=
|
|
GOOGLE_CLIENT_SECRET=
|
|
|
|
# Server info (set these to your server's IP and hostname)
|
|
SERVER_HOST=CHANGE_ME_SET_SERVER_IP
|
|
SERVER_HOSTNAME=CHANGE_ME_SET_SERVER_HOSTNAME
|
|
EOF
|
|
chmod 600 "$ENV_FILE"
|
|
chown root:data-ops "$ENV_FILE"
|
|
echo ""
|
|
echo "IMPORTANT: Edit ${ENV_FILE} and add your Google OAuth credentials!"
|
|
echo ""
|
|
else
|
|
echo ".env file already exists at ${ENV_FILE}"
|
|
fi
|
|
|
|
# Add www-data to data-ops group for static file access
|
|
echo "Adding www-data to data-ops group..."
|
|
usermod -aG data-ops www-data
|
|
|
|
# Install sudoers rules for www-data (from repo, includes all required rules)
|
|
echo "Configuring sudoers..."
|
|
SUDOERS_FILE="/etc/sudoers.d/webapp"
|
|
cp "${REPO_DIR}/server/sudoers-webapp" "$SUDOERS_FILE"
|
|
chmod 440 "$SUDOERS_FILE"
|
|
|
|
# Validate sudoers syntax
|
|
if ! visudo -cf "$SUDOERS_FILE"; then
|
|
echo "ERROR: Invalid sudoers syntax"
|
|
rm -f "$SUDOERS_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
# Install systemd service
|
|
echo "Installing systemd service..."
|
|
cp "${REPO_DIR}/server/webapp.service" /etc/systemd/system/webapp.service
|
|
systemctl daemon-reload
|
|
|
|
# Install Nginx configuration
|
|
echo "Installing Nginx configuration..."
|
|
cp "${REPO_DIR}/server/webapp-nginx.conf" /etc/nginx/sites-available/webapp
|
|
|
|
# Remove default site if it exists
|
|
rm -f /etc/nginx/sites-enabled/default
|
|
|
|
# Enable webapp site
|
|
ln -sf /etc/nginx/sites-available/webapp /etc/nginx/sites-enabled/webapp
|
|
|
|
# Test Nginx config
|
|
if ! nginx -t; then
|
|
echo "ERROR: Nginx configuration test failed"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if SSL certificate exists
|
|
if [[ ! -f "/etc/letsencrypt/live/${DOMAIN}/fullchain.pem" ]]; then
|
|
echo ""
|
|
echo "SSL certificate not found. Obtaining certificate..."
|
|
echo ""
|
|
echo "IMPORTANT: Make sure DNS A record for ${DOMAIN} points to this server!"
|
|
echo ""
|
|
read -p "Press Enter to continue or Ctrl+C to abort..."
|
|
|
|
# Temporarily disable HTTPS server block
|
|
sed -i 's/listen 443/# listen 443/g' /etc/nginx/sites-available/webapp
|
|
systemctl reload nginx
|
|
|
|
# Get certificate
|
|
certbot certonly --nginx -d "$DOMAIN" --non-interactive --agree-tos \
|
|
--email "${CERTBOT_EMAIL:-admin@${DOMAIN}}" --redirect
|
|
|
|
# Re-enable HTTPS
|
|
sed -i 's/# listen 443/listen 443/g' /etc/nginx/sites-available/webapp
|
|
fi
|
|
|
|
# Start services
|
|
echo "Starting services..."
|
|
systemctl enable webapp
|
|
systemctl start webapp || true # May fail if .env not configured
|
|
|
|
systemctl reload nginx
|
|
|
|
echo ""
|
|
echo "=== Setup Complete ==="
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo ""
|
|
echo "1. Configure Google OAuth:"
|
|
echo " - Go to Google Cloud Console -> APIs & Services -> Credentials"
|
|
echo " - Create OAuth 2.0 Client ID (Web application)"
|
|
echo " - Set Authorized JavaScript origins: https://${DOMAIN}"
|
|
echo " - Set Authorized redirect URIs: https://${DOMAIN}/authorize"
|
|
echo ""
|
|
echo "2. Edit ${ENV_FILE}:"
|
|
echo " - Add GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET"
|
|
echo " - Generate and set WEBAPP_SECRET_KEY"
|
|
echo ""
|
|
echo "3. Restart the webapp:"
|
|
echo " systemctl restart webapp"
|
|
echo ""
|
|
echo "4. Check status:"
|
|
echo " systemctl status webapp"
|
|
echo " systemctl status nginx"
|
|
echo ""
|
|
echo "5. Test the site:"
|
|
echo " curl -I https://${DOMAIN}"
|
|
echo ""
|