- New sync_schedule and profile_after_sync fields in TableConfig (formats: "every 15m", "every 1h", "daily 05:00") - New src/scheduler.py with schedule evaluation logic (is_table_due) - New --scheduled mode in data_sync.py: only syncs tables that are due, respects profile_after_sync flag, auto-restarts webapp after profiling - Systemd timer+service for data-refresh (every 15 min) - Systemd timer+service for catalog-refresh (every 15 min) - deploy.sh enables new timers automatically - Complete table config reference in data_description.md.example - 58 new scheduler tests
33 lines
947 B
Desktop File
33 lines
947 B
Desktop File
[Unit]
|
|
Description=Data Refresh - scheduled sync from BigQuery to local Parquet
|
|
After=network-online.target
|
|
Wants=network-online.target
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
User=root
|
|
Group=data-ops
|
|
WorkingDirectory=/opt/data-analyst/repo
|
|
ExecStart=/opt/data-analyst/.venv/bin/python3 -m src.data_sync --scheduled
|
|
|
|
# Environment
|
|
EnvironmentFile=/opt/data-analyst/.env
|
|
Environment=PYTHONPATH=/opt/data-analyst/repo
|
|
Environment=CONFIG_DIR=/opt/data-analyst/instance/config
|
|
|
|
# Write access to data directory and logs
|
|
ProtectSystem=strict
|
|
ReadWritePaths=/data /opt/data-analyst/logs /tmp/data_analyst_staging
|
|
PrivateTmp=false
|
|
|
|
# Sync can take a while for large tables
|
|
TimeoutStartSec=3600
|
|
|
|
# Prevent overlapping runs
|
|
ExecCondition=/usr/bin/test ! -f /tmp/data-refresh.lock
|
|
ExecStartPre=/usr/bin/touch /tmp/data-refresh.lock
|
|
ExecStartPost=/usr/bin/rm -f /tmp/data-refresh.lock
|
|
ExecStopPost=/usr/bin/rm -f /tmp/data-refresh.lock
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|