agnes-the-ai-analyst/scripts/init.sh
Petr c56905d34f Initial commit: OSS data distribution platform
Open-source AI data analyst platform extracted from internal repo.
Includes data sync engine, Keboola adapter, Flask web portal,
server deployment scripts, and configuration templates.
2026-03-08 23:31:28 +01:00

109 lines
3 KiB
Bash
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# init.sh - Initialization script for AI Data Analyst Tool
#
# This script sets up the project for first use:
# 1. Creates Python virtual environment
# 2. Installs dependencies
# 3. Creates necessary folders
# 4. Copies .env template
set -e # Exit on error
echo "Initializing AI Data Analyst..."
echo ""
# Check Python version
echo "1⃣ Checking Python..."
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is not installed. Install Python 3.8 or newer."
exit 1
fi
PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}')
echo " ✅ Python version: $PYTHON_VERSION"
# Create virtual environment
echo ""
echo "2⃣ Creating virtual environment..."
if [ -d ".venv" ]; then
echo " ⚠️ .venv already exists, skipping..."
else
python3 -m venv .venv
echo " ✅ Virtual environment created"
fi
# Activate venv
echo ""
echo "3⃣ Activating virtual environment..."
source .venv/bin/activate
echo " ✅ Virtual environment activated"
# Upgrade pip
echo ""
echo "4⃣ Upgrading pip..."
pip install --upgrade pip --quiet
echo " ✅ pip upgraded"
# Install dependencies
echo ""
echo "5⃣ Installing dependencies from requirements.txt..."
pip install -r requirements.txt --quiet
echo " ✅ Dependencies installed"
# Create folders
echo ""
echo "6⃣ Creating data folders..."
# Load DATA_DIR from .env (default: ./data)
if [ -f ".env" ]; then
DATA_DIR=$(grep -E "^DATA_DIR=" .env | cut -d'=' -f2 | tr -d '"' | tr -d "'" || echo "./data")
fi
DATA_DIR="${DATA_DIR:-./data}"
mkdir -p "${DATA_DIR}/parquet"
mkdir -p "${DATA_DIR}/metadata"
mkdir -p "${DATA_DIR}/staging"
mkdir -p "${DATA_DIR}/duckdb"
echo " ✅ Folders created in ${DATA_DIR}:"
echo " - parquet/ (Parquet files - subfolders created automatically)"
echo " - metadata/ (sync state, cache)"
echo " - staging/ (temporary files during sync)"
echo " - duckdb/ (DuckDB database)"
# Copy .env template
echo ""
echo "7⃣ Setting up .env file..."
if [ -f ".env" ]; then
echo " ⚠️ .env already exists, keeping it..."
else
cp config/.env.template .env
echo " ✅ .env created from template"
echo ""
echo " How to set up:"
echo " 1. Copy config/instance.yaml.example to config/instance.yaml"
echo " 2. Fill in your data source credentials in .env"
echo " 3. Configure your instance settings in config/instance.yaml"
echo ""
fi
# Test configuration
echo ""
echo "8⃣ Testing configuration..."
if python3 -c "from src.config import get_config; get_config()" 2>/dev/null; then
echo " ✅ Configuration is valid"
else
echo " ⚠️ Configuration not complete yet (expected - fill in .env)"
fi
# Done
echo ""
echo "✅ Initialization complete!"
echo ""
echo "Next steps:"
echo " 1. Configure config/instance.yaml and .env"
echo " 2. Run './scripts/update.sh' to sync data"
echo " 3. Use Claude Code to analyze the data!"
echo ""
echo "💡 Tip: Activate virtual environment with 'source .venv/bin/activate'"
echo ""