Fix sync_schedule validation to accept multi-time daily format

The scheduler.py already supported "daily HH:MM,HH:MM,HH:MM" format
(commit 5f27d05), but config.py validation regex only accepted single
time "daily HH:MM", causing data-refresh to crash on startup.

Also adds:
- tests/test_config_sync_schedule.py (16 test cases)
- Makefile with validate-config target for CI/CD integration
This commit is contained in:
Petr 2026-03-17 13:21:14 +01:00
parent 5f27d05894
commit ab99f0af92
3 changed files with 181 additions and 2 deletions

75
Makefile Normal file
View file

@ -0,0 +1,75 @@
# AI Data Analyst - Development Makefile
#
# Usage:
# make - show help
# make test - run all tests
# make test-config - run config-related tests only
# make validate-config CONFIG_DIR=path/to/config - validate data_description.md
# make lint - placeholder for future linting
SHELL := /bin/bash
PYTHON := .venv/bin/python
PYTEST := .venv/bin/pytest
# Optional: path to config directory containing data_description.md
# Default: config/ (relative to project root)
CONFIG_DIR ?= config
.PHONY: help test test-config validate-config lint
# Default target
help:
@echo "Available targets:"
@echo " make test Run all pytest tests"
@echo " make test-config Run config and scheduler tests only"
@echo " make validate-config Validate data_description.md parsing"
@echo " Optional: CONFIG_DIR=path/to/config (default: config/)"
@echo " make lint Placeholder for future linting"
@echo ""
@echo "Prerequisites: Python virtualenv at .venv/ with dependencies installed"
test:
$(PYTEST)
test-config:
$(PYTEST) tests/test_config_query_mode.py tests/test_config_sync_schedule.py tests/test_scheduler.py -v
define VALIDATE_SCRIPT
import os, sys, re, tempfile, shutil
from pathlib import Path
config_dir = Path(os.environ.get("CONFIG_DIR", "config"))
config_file = config_dir / "data_description.md"
if not config_file.exists():
print("FAIL: %s not found" % config_file, file=sys.stderr)
sys.exit(1)
# Ensure docs/data_description.md exists so Config._find_project_root() works.
# If CONFIG_DIR points elsewhere, create a temporary symlink.
docs_path = Path("docs/data_description.md")
created_symlink = False
if not docs_path.exists():
docs_path.parent.mkdir(parents=True, exist_ok=True)
docs_path.symlink_to(config_file.resolve())
created_symlink = True
try:
from src.config import Config
c = Config()
names = ", ".join(t.name for t in c.tables)
print("OK: parsed %d table(s): %s" % (len(c.tables), names))
except Exception as e:
print("FAIL: %s" % e, file=sys.stderr)
sys.exit(1)
finally:
if created_symlink:
docs_path.unlink(missing_ok=True)
endef
export VALIDATE_SCRIPT
validate-config:
@echo "Validating data_description.md in CONFIG_DIR=$(CONFIG_DIR) ..."
@CONFIG_DIR=$(CONFIG_DIR) $(PYTHON) -c "$$VALIDATE_SCRIPT"
lint:
@echo "Linting not configured yet. Add ruff, flake8, or similar here."

View file

@ -167,12 +167,13 @@ class TableConfig:
import re as _re
valid_schedule = (
_re.match(r"^every \d+[mh]$", self.sync_schedule)
or _re.match(r"^daily \d{2}:\d{2}$", self.sync_schedule)
or _re.match(r"^daily \d{2}:\d{2}(,\d{2}:\d{2})*$", self.sync_schedule)
)
if not valid_schedule:
raise ValueError(
f"Invalid sync_schedule '{self.sync_schedule}' for table {self.id}. "
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00'"
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00', "
f"'daily 07:00,13:00,18:00'"
)
# For partitioned, partition_by must be defined

View file

@ -0,0 +1,103 @@
"""Tests for TableConfig.sync_schedule field validation."""
import pytest
from src.config import TableConfig
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_table(**overrides) -> TableConfig:
"""Create a TableConfig with sensible defaults, applying overrides."""
defaults = dict(
id="test.dataset.table",
name="test_table",
description="Test",
primary_key="id",
sync_strategy="full_refresh",
)
defaults.update(overrides)
return TableConfig(**defaults)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestSyncScheduleDefault:
def test_default_is_none(self):
table = _make_table()
assert table.sync_schedule is None
class TestSyncScheduleValidValues:
@pytest.mark.parametrize(
"schedule",
[
"every 15m",
"every 1h",
"daily 05:00",
"daily 07:00,13:00,18:00",
"daily 00:00,12:00",
],
ids=[
"every-15m",
"every-1h",
"daily-single",
"daily-three-times",
"daily-two-times",
],
)
def test_valid_schedule_accepted(self, schedule: str):
table = _make_table(sync_schedule=schedule)
assert table.sync_schedule == schedule
class TestSyncScheduleEdgeCases:
def test_every_zero_minutes(self):
"""every 0m matches the regex -- validation is syntactic, not semantic."""
table = _make_table(sync_schedule="every 0m")
assert table.sync_schedule == "every 0m"
def test_daily_2359(self):
table = _make_table(sync_schedule="daily 23:59")
assert table.sync_schedule == "daily 23:59"
class TestSyncScheduleInvalid:
@pytest.mark.parametrize(
"bad_schedule",
[
"daily 07:00,13:00,18:00,", # trailing comma
"daily 7:00", # single-digit hour
"daily", # missing time
"hourly", # unsupported keyword
"weekly", # unsupported keyword
],
ids=[
"trailing-comma",
"single-digit-hour",
"daily-no-time",
"hourly-keyword",
"weekly-keyword",
],
)
def test_invalid_schedule_raises(self, bad_schedule: str):
with pytest.raises(ValueError, match="Invalid sync_schedule"):
_make_table(sync_schedule=bad_schedule)
def test_empty_string_treated_as_none(self):
"""Empty string is falsy, so validation is skipped (same as None)."""
table = _make_table(sync_schedule="")
assert table.sync_schedule == ""
def test_daily_25_accepted_by_regex(self):
"""25:00 passes regex validation (two digits). Document this behavior."""
table = _make_table(sync_schedule="daily 25:00")
assert table.sync_schedule == "daily 25:00"
class TestSyncScheduleNoneExplicit:
def test_explicit_none_accepted(self):
table = _make_table(sync_schedule=None)
assert table.sync_schedule is None