agnes-the-ai-analyst/scripts/update.sh

#!/bin/bash

# update.sh - Data synchronization script
#
# This script performs:
# 1. Data synchronization from configured data source
# 2. DuckDB views reinitialization
#
# Note: Git pull and dependency updates are handled by deploy.sh (GitHub Actions)

set -e  # Exit on error

echo "🔄 AI Data Analyst - Data Update"
echo ""

# Check that we're in the correct folder (same check as config.py uses)
if [ ! -f "docs/data_description.md" ]; then
    echo "❌ Run script from project root (folder with docs/data_description.md)"
    exit 1
fi

# Note: Git pull and dependency updates are handled by deploy.sh (GitHub Actions)
# This script focuses only on data synchronization

# Activate virtual environment
# Supports both local (./.venv) and server (/opt/data-analyst/.venv) setups
echo ""
echo "1️⃣  Activating virtual environment..."
if [ -d ".venv" ]; then
    source .venv/bin/activate
    echo "   ✅ Virtual environment activated (local)"
elif [ -d "/opt/data-analyst/.venv" ]; then
    source /opt/data-analyst/.venv/bin/activate
    echo "   ✅ Virtual environment activated (server)"
else
    echo "   ❌ Virtual environment not found. Run init.sh first."
    exit 1
fi

# Data synchronization
echo ""
echo "2️⃣  Synchronizing data..."
echo ""

# Run data sync
if python3 -m src.data_sync; then
    echo ""
    echo "   ✅ Data synchronization complete"
else
    echo ""
    echo "   ❌ Data synchronization failed. Check logs above."
    exit 1
fi

# Generate data profiles (for catalog profiler)
echo ""
echo "3️⃣  Generating data profiles..."
if python3 -m src.profiler; then
    echo "   ✅ Data profiles generated"
else
    echo "   ⚠️  Data profiling failed (non-fatal). Check logs above."
    # Non-fatal: profiling failure should not break the pipeline
fi

# Done
echo ""
echo "✅ Data sync complete!"
echo ""
echo "💡 Parquet files are ready in data/parquet/"
echo "   To setup DuckDB views, run: ./scripts/setup_views.sh"
echo ""