From ab99f0af9227f5e5f59801d9554bfc4df7315451 Mon Sep 17 00:00:00 2001 From: Petr Date: Tue, 17 Mar 2026 13:21:14 +0100 Subject: [PATCH] Fix sync_schedule validation to accept multi-time daily format The scheduler.py already supported "daily HH:MM,HH:MM,HH:MM" format (commit 5f27d05), but config.py validation regex only accepted single time "daily HH:MM", causing data-refresh to crash on startup. Also adds: - tests/test_config_sync_schedule.py (16 test cases) - Makefile with validate-config target for CI/CD integration --- Makefile | 75 +++++++++++++++++++++ src/config.py | 5 +- tests/test_config_sync_schedule.py | 103 +++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 Makefile create mode 100644 tests/test_config_sync_schedule.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..86e391c --- /dev/null +++ b/Makefile @@ -0,0 +1,75 @@ +# AI Data Analyst - Development Makefile +# +# Usage: +# make - show help +# make test - run all tests +# make test-config - run config-related tests only +# make validate-config CONFIG_DIR=path/to/config - validate data_description.md +# make lint - placeholder for future linting + +SHELL := /bin/bash +PYTHON := .venv/bin/python +PYTEST := .venv/bin/pytest + +# Optional: path to config directory containing data_description.md +# Default: config/ (relative to project root) +CONFIG_DIR ?= config + +.PHONY: help test test-config validate-config lint + +# Default target +help: + @echo "Available targets:" + @echo " make test Run all pytest tests" + @echo " make test-config Run config and scheduler tests only" + @echo " make validate-config Validate data_description.md parsing" + @echo " Optional: CONFIG_DIR=path/to/config (default: config/)" + @echo " make lint Placeholder for future linting" + @echo "" + @echo "Prerequisites: Python virtualenv at .venv/ with dependencies installed" + +test: + $(PYTEST) + +test-config: + $(PYTEST) tests/test_config_query_mode.py tests/test_config_sync_schedule.py tests/test_scheduler.py -v + +define VALIDATE_SCRIPT +import os, sys, re, tempfile, shutil +from pathlib import Path + +config_dir = Path(os.environ.get("CONFIG_DIR", "config")) +config_file = config_dir / "data_description.md" +if not config_file.exists(): + print("FAIL: %s not found" % config_file, file=sys.stderr) + sys.exit(1) + +# Ensure docs/data_description.md exists so Config._find_project_root() works. +# If CONFIG_DIR points elsewhere, create a temporary symlink. +docs_path = Path("docs/data_description.md") +created_symlink = False +if not docs_path.exists(): + docs_path.parent.mkdir(parents=True, exist_ok=True) + docs_path.symlink_to(config_file.resolve()) + created_symlink = True + +try: + from src.config import Config + c = Config() + names = ", ".join(t.name for t in c.tables) + print("OK: parsed %d table(s): %s" % (len(c.tables), names)) +except Exception as e: + print("FAIL: %s" % e, file=sys.stderr) + sys.exit(1) +finally: + if created_symlink: + docs_path.unlink(missing_ok=True) +endef +export VALIDATE_SCRIPT + +validate-config: + @echo "Validating data_description.md in CONFIG_DIR=$(CONFIG_DIR) ..." + @CONFIG_DIR=$(CONFIG_DIR) $(PYTHON) -c "$$VALIDATE_SCRIPT" + +lint: + @echo "Linting not configured yet. Add ruff, flake8, or similar here." diff --git a/src/config.py b/src/config.py index 3ff5374..cb0c627 100644 --- a/src/config.py +++ b/src/config.py @@ -167,12 +167,13 @@ class TableConfig: import re as _re valid_schedule = ( _re.match(r"^every \d+[mh]$", self.sync_schedule) - or _re.match(r"^daily \d{2}:\d{2}$", self.sync_schedule) + or _re.match(r"^daily \d{2}:\d{2}(,\d{2}:\d{2})*$", self.sync_schedule) ) if not valid_schedule: raise ValueError( f"Invalid sync_schedule '{self.sync_schedule}' for table {self.id}. " - f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00'" + f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00', " + f"'daily 07:00,13:00,18:00'" ) # For partitioned, partition_by must be defined diff --git a/tests/test_config_sync_schedule.py b/tests/test_config_sync_schedule.py new file mode 100644 index 0000000..0a07c4a --- /dev/null +++ b/tests/test_config_sync_schedule.py @@ -0,0 +1,103 @@ +"""Tests for TableConfig.sync_schedule field validation.""" + +import pytest + +from src.config import TableConfig + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +def _make_table(**overrides) -> TableConfig: + """Create a TableConfig with sensible defaults, applying overrides.""" + defaults = dict( + id="test.dataset.table", + name="test_table", + description="Test", + primary_key="id", + sync_strategy="full_refresh", + ) + defaults.update(overrides) + return TableConfig(**defaults) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- +class TestSyncScheduleDefault: + def test_default_is_none(self): + table = _make_table() + assert table.sync_schedule is None + + +class TestSyncScheduleValidValues: + @pytest.mark.parametrize( + "schedule", + [ + "every 15m", + "every 1h", + "daily 05:00", + "daily 07:00,13:00,18:00", + "daily 00:00,12:00", + ], + ids=[ + "every-15m", + "every-1h", + "daily-single", + "daily-three-times", + "daily-two-times", + ], + ) + def test_valid_schedule_accepted(self, schedule: str): + table = _make_table(sync_schedule=schedule) + assert table.sync_schedule == schedule + + +class TestSyncScheduleEdgeCases: + def test_every_zero_minutes(self): + """every 0m matches the regex -- validation is syntactic, not semantic.""" + table = _make_table(sync_schedule="every 0m") + assert table.sync_schedule == "every 0m" + + def test_daily_2359(self): + table = _make_table(sync_schedule="daily 23:59") + assert table.sync_schedule == "daily 23:59" + + +class TestSyncScheduleInvalid: + @pytest.mark.parametrize( + "bad_schedule", + [ + "daily 07:00,13:00,18:00,", # trailing comma + "daily 7:00", # single-digit hour + "daily", # missing time + "hourly", # unsupported keyword + "weekly", # unsupported keyword + ], + ids=[ + "trailing-comma", + "single-digit-hour", + "daily-no-time", + "hourly-keyword", + "weekly-keyword", + ], + ) + def test_invalid_schedule_raises(self, bad_schedule: str): + with pytest.raises(ValueError, match="Invalid sync_schedule"): + _make_table(sync_schedule=bad_schedule) + + def test_empty_string_treated_as_none(self): + """Empty string is falsy, so validation is skipped (same as None).""" + table = _make_table(sync_schedule="") + assert table.sync_schedule == "" + + def test_daily_25_accepted_by_regex(self): + """25:00 passes regex validation (two digits). Document this behavior.""" + table = _make_table(sync_schedule="daily 25:00") + assert table.sync_schedule == "daily 25:00" + + +class TestSyncScheduleNoneExplicit: + def test_explicit_none_accepted(self): + table = _make_table(sync_schedule=None) + assert table.sync_schedule is None