From 5e0e4ceb9ef74d1bcdcb34b9c7126343354655d4 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 9 Apr 2026 17:16:04 +0200 Subject: [PATCH] fix: rewrite Makefile and scripts/README.md Makefile simplified to four targets (test, dev, docker, lint) aligned with the current FastAPI/Docker architecture. scripts/README.md rewritten to list only the active and migration scripts that still exist. --- Makefile | 77 +++++++----------------------------------- scripts/README.md | 85 ++++++++--------------------------------------- 2 files changed, 25 insertions(+), 137 deletions(-) diff --git a/Makefile b/Makefile index 86e391c..332dfd6 100644 --- a/Makefile +++ b/Makefile @@ -1,75 +1,22 @@ -# AI Data Analyst - Development Makefile -# -# Usage: -# make - show help -# make test - run all tests -# make test-config - run config-related tests only -# make validate-config CONFIG_DIR=path/to/config - validate data_description.md -# make lint - placeholder for future linting +# Agnes AI Data Analyst — Development Makefile -SHELL := /bin/bash -PYTHON := .venv/bin/python -PYTEST := .venv/bin/pytest +.PHONY: help test lint dev docker -# Optional: path to config directory containing data_description.md -# Default: config/ (relative to project root) -CONFIG_DIR ?= config - -.PHONY: help test test-config validate-config lint - -# Default target help: @echo "Available targets:" - @echo " make test Run all pytest tests" - @echo " make test-config Run config and scheduler tests only" - @echo " make validate-config Validate data_description.md parsing" - @echo " Optional: CONFIG_DIR=path/to/config (default: config/)" - @echo " make lint Placeholder for future linting" - @echo "" - @echo "Prerequisites: Python virtualenv at .venv/ with dependencies installed" + @echo " make test Run test suite" + @echo " make dev Start FastAPI dev server" + @echo " make docker Build and start Docker Compose" + @echo " make lint Run ruff linter (if installed)" test: - $(PYTEST) + pytest tests/ -v --tb=short -test-config: - $(PYTEST) tests/test_config_query_mode.py tests/test_config_sync_schedule.py tests/test_scheduler.py -v +dev: + uvicorn app.main:app --reload -define VALIDATE_SCRIPT -import os, sys, re, tempfile, shutil -from pathlib import Path - -config_dir = Path(os.environ.get("CONFIG_DIR", "config")) -config_file = config_dir / "data_description.md" -if not config_file.exists(): - print("FAIL: %s not found" % config_file, file=sys.stderr) - sys.exit(1) - -# Ensure docs/data_description.md exists so Config._find_project_root() works. -# If CONFIG_DIR points elsewhere, create a temporary symlink. -docs_path = Path("docs/data_description.md") -created_symlink = False -if not docs_path.exists(): - docs_path.parent.mkdir(parents=True, exist_ok=True) - docs_path.symlink_to(config_file.resolve()) - created_symlink = True - -try: - from src.config import Config - c = Config() - names = ", ".join(t.name for t in c.tables) - print("OK: parsed %d table(s): %s" % (len(c.tables), names)) -except Exception as e: - print("FAIL: %s" % e, file=sys.stderr) - sys.exit(1) -finally: - if created_symlink: - docs_path.unlink(missing_ok=True) -endef -export VALIDATE_SCRIPT - -validate-config: - @echo "Validating data_description.md in CONFIG_DIR=$(CONFIG_DIR) ..." - @CONFIG_DIR=$(CONFIG_DIR) $(PYTHON) -c "$$VALIDATE_SCRIPT" +docker: + docker compose up --build lint: - @echo "Linting not configured yet. Add ruff, flake8, or similar here." + @ruff check . 2>/dev/null || echo "ruff not installed: pip install ruff" diff --git a/scripts/README.md b/scripts/README.md index 9079834..4666da8 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,78 +1,19 @@ # Scripts -Helper scripts for working with AI Data Analyst project. +Utility and migration scripts for Agnes AI Data Analyst. -These scripts are synced from the server into `server/scripts/` on the analyst's machine. +## Active Scripts -## Available Scripts +| Script | Purpose | +|--------|---------| +| `generate_sample_data.py` | Generate sample data for development/demo | +| `duckdb_manager.py` | DuckDB database management utilities | +| `init.sh` | Initial server setup (install deps, create dirs) | -### `setup_views.sh` +## Migration Scripts (one-time use) -Initialize or refresh DuckDB views on Parquet files. - -```bash -bash server/scripts/setup_views.sh -``` - -### `sync_data.sh` - -Synchronize data from server, upload user files, and refresh DuckDB. - -```bash -# Recommended: update scripts first, then sync -rsync -avz data-analyst:server/scripts/ ./server/scripts/ # Linux/macOS -scp -r data-analyst:server/scripts/* ./server/scripts/ # Windows fallback -bash server/scripts/sync_data.sh - -# Other options: -bash server/scripts/sync_data.sh --dry-run # Preview what would be synced (no changes) -bash server/scripts/sync_data.sh --push # Only upload user/ to server -``` - -**What sync does:** -1. **Self-update check** - detects if sync_data.sh changed, asks to re-run if so -2. Downloads `server/docs/`, `server/scripts/`, `server/metadata/` from server -3. Updates `CLAUDE.md` from latest template -4. Downloads `server/parquet/` data files (with `--delete` to remove old files) -5. Uploads `user/` directory to server (backup, no `--delete`) -6. Syncs Python environment to server -7. **Validates DuckDB** - if corrupted, deletes and recreates from parquets -8. Reinitializes DuckDB views (`CREATE OR REPLACE VIEW` for all tables) - -**Self-update mechanism:** -The script checks its own checksum before and after syncing scripts. If it detects it was updated, it exits with a message asking you to run sync again. This ensures you always run the latest sync logic. - -**DuckDB corruption recovery:** -If DuckDB file is corrupted (e.g., interrupted sync), it's automatically detected and recreated. All data is safe in parquet files - DuckDB only contains VIEW definitions. - -## Development Scripts - -### `dev_run.py` - -Flask development server with authentication bypass for local testing. - -```bash -python3 scripts/dev_run.py -``` - -Starts a local Flask server at http://127.0.0.1:5000 with: -- Auth bypass routes (`/dev-login`, `/dev-catalog`) - no OAuth required -- Debug mode with hot reload - -### `test_sync.sh` - -Test rsync reliability with the data server. - -```bash -bash scripts/test_sync.sh # Full test sync -bash scripts/test_sync.sh --dry-run # Preview only -``` - -## Typical Workflow - -1. **First time setup**: Follow bootstrap.yaml instructions -2. **Before analysis**: Sync latest data - ```bash - bash server/scripts/sync_data.sh - ``` -4. **Analyze**: Use DuckDB database at `user/duckdb/analytics.duckdb` +| Script | Purpose | +|--------|---------| +| `migrate_json_to_duckdb.py` | Migrate v1 JSON state files to DuckDB | +| `migrate_parquets_to_extracts.py` | Migrate v1 parquet layout to extract.duckdb | +| `migrate_registry_to_duckdb.py` | Migrate v1 table registry to DuckDB |