Open-source AI data analyst platform extracted from internal repo. Includes data sync engine, Keboola adapter, Flask web portal, server deployment scripts, and configuration templates.
71 lines
1.9 KiB
Bash
Executable file
71 lines
1.9 KiB
Bash
Executable file
#!/bin/bash
|
||
|
||
# update.sh - Data synchronization script
|
||
#
|
||
# This script performs:
|
||
# 1. Data synchronization from configured data source
|
||
# 2. DuckDB views reinitialization
|
||
#
|
||
# Note: Git pull and dependency updates are handled by deploy.sh (GitHub Actions)
|
||
|
||
set -e # Exit on error
|
||
|
||
echo "🔄 AI Data Analyst - Data Update"
|
||
echo ""
|
||
|
||
# Check that we're in the correct folder (same check as config.py uses)
|
||
if [ ! -f "docs/data_description.md" ]; then
|
||
echo "❌ Run script from project root (folder with docs/data_description.md)"
|
||
exit 1
|
||
fi
|
||
|
||
# Note: Git pull and dependency updates are handled by deploy.sh (GitHub Actions)
|
||
# This script focuses only on data synchronization
|
||
|
||
# Activate virtual environment
|
||
# Supports both local (./.venv) and server (/opt/data-analyst/.venv) setups
|
||
echo ""
|
||
echo "1️⃣ Activating virtual environment..."
|
||
if [ -d ".venv" ]; then
|
||
source .venv/bin/activate
|
||
echo " ✅ Virtual environment activated (local)"
|
||
elif [ -d "/opt/data-analyst/.venv" ]; then
|
||
source /opt/data-analyst/.venv/bin/activate
|
||
echo " ✅ Virtual environment activated (server)"
|
||
else
|
||
echo " ❌ Virtual environment not found. Run init.sh first."
|
||
exit 1
|
||
fi
|
||
|
||
# Data synchronization
|
||
echo ""
|
||
echo "2️⃣ Synchronizing data..."
|
||
echo ""
|
||
|
||
# Run data sync
|
||
if python3 -m src.data_sync; then
|
||
echo ""
|
||
echo " ✅ Data synchronization complete"
|
||
else
|
||
echo ""
|
||
echo " ❌ Data synchronization failed. Check logs above."
|
||
exit 1
|
||
fi
|
||
|
||
# Generate data profiles (for catalog profiler)
|
||
echo ""
|
||
echo "3️⃣ Generating data profiles..."
|
||
if python3 -m src.profiler; then
|
||
echo " ✅ Data profiles generated"
|
||
else
|
||
echo " ⚠️ Data profiling failed (non-fatal). Check logs above."
|
||
# Non-fatal: profiling failure should not break the pipeline
|
||
fi
|
||
|
||
# Done
|
||
echo ""
|
||
echo "✅ Data sync complete!"
|
||
echo ""
|
||
echo "💡 Parquet files are ready in data/parquet/"
|
||
echo " To setup DuckDB views, run: ./scripts/setup_views.sh"
|
||
echo ""
|