- New sync_schedule and profile_after_sync fields in TableConfig (formats: "every 15m", "every 1h", "daily 05:00") - New src/scheduler.py with schedule evaluation logic (is_table_due) - New --scheduled mode in data_sync.py: only syncs tables that are due, respects profile_after_sync flag, auto-restarts webapp after profiling - Systemd timer+service for data-refresh (every 15 min) - Systemd timer+service for catalog-refresh (every 15 min) - deploy.sh enables new timers automatically - Complete table config reference in data_description.md.example - 58 new scheduler tests
27 lines
680 B
Desktop File
27 lines
680 B
Desktop File
[Unit]
|
|
Description=Catalog Refresh - export metrics and tables from OpenMetadata to YAML
|
|
After=network-online.target
|
|
Wants=network-online.target
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
User=root
|
|
Group=data-ops
|
|
WorkingDirectory=/opt/data-analyst/repo
|
|
ExecStart=/opt/data-analyst/.venv/bin/python3 -m src.catalog_export
|
|
|
|
# Environment
|
|
EnvironmentFile=/opt/data-analyst/.env
|
|
Environment=PYTHONPATH=/opt/data-analyst/repo
|
|
Environment=CONFIG_DIR=/opt/data-analyst/instance/config
|
|
|
|
# Write access to docs output directory
|
|
ProtectSystem=strict
|
|
ReadWritePaths=/data/docs /opt/data-analyst/logs
|
|
PrivateTmp=true
|
|
|
|
# Catalog export is fast (seconds)
|
|
TimeoutStartSec=120
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|