Fix sync_schedule validation to accept multi-time daily format
The scheduler.py already supported "daily HH:MM,HH:MM,HH:MM" format
(commit 5f27d05), but config.py validation regex only accepted single
time "daily HH:MM", causing data-refresh to crash on startup.
Also adds:
- tests/test_config_sync_schedule.py (16 test cases)
- Makefile with validate-config target for CI/CD integration
This commit is contained in:
parent
5f27d05894
commit
ab99f0af92
3 changed files with 181 additions and 2 deletions
75
Makefile
Normal file
75
Makefile
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
# AI Data Analyst - Development Makefile
|
||||
#
|
||||
# Usage:
|
||||
# make - show help
|
||||
# make test - run all tests
|
||||
# make test-config - run config-related tests only
|
||||
# make validate-config CONFIG_DIR=path/to/config - validate data_description.md
|
||||
# make lint - placeholder for future linting
|
||||
|
||||
SHELL := /bin/bash
|
||||
PYTHON := .venv/bin/python
|
||||
PYTEST := .venv/bin/pytest
|
||||
|
||||
# Optional: path to config directory containing data_description.md
|
||||
# Default: config/ (relative to project root)
|
||||
CONFIG_DIR ?= config
|
||||
|
||||
.PHONY: help test test-config validate-config lint
|
||||
|
||||
# Default target
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " make test Run all pytest tests"
|
||||
@echo " make test-config Run config and scheduler tests only"
|
||||
@echo " make validate-config Validate data_description.md parsing"
|
||||
@echo " Optional: CONFIG_DIR=path/to/config (default: config/)"
|
||||
@echo " make lint Placeholder for future linting"
|
||||
@echo ""
|
||||
@echo "Prerequisites: Python virtualenv at .venv/ with dependencies installed"
|
||||
|
||||
test:
|
||||
$(PYTEST)
|
||||
|
||||
test-config:
|
||||
$(PYTEST) tests/test_config_query_mode.py tests/test_config_sync_schedule.py tests/test_scheduler.py -v
|
||||
|
||||
define VALIDATE_SCRIPT
|
||||
import os, sys, re, tempfile, shutil
|
||||
from pathlib import Path
|
||||
|
||||
config_dir = Path(os.environ.get("CONFIG_DIR", "config"))
|
||||
config_file = config_dir / "data_description.md"
|
||||
if not config_file.exists():
|
||||
print("FAIL: %s not found" % config_file, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Ensure docs/data_description.md exists so Config._find_project_root() works.
|
||||
# If CONFIG_DIR points elsewhere, create a temporary symlink.
|
||||
docs_path = Path("docs/data_description.md")
|
||||
created_symlink = False
|
||||
if not docs_path.exists():
|
||||
docs_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
docs_path.symlink_to(config_file.resolve())
|
||||
created_symlink = True
|
||||
|
||||
try:
|
||||
from src.config import Config
|
||||
c = Config()
|
||||
names = ", ".join(t.name for t in c.tables)
|
||||
print("OK: parsed %d table(s): %s" % (len(c.tables), names))
|
||||
except Exception as e:
|
||||
print("FAIL: %s" % e, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if created_symlink:
|
||||
docs_path.unlink(missing_ok=True)
|
||||
endef
|
||||
export VALIDATE_SCRIPT
|
||||
|
||||
validate-config:
|
||||
@echo "Validating data_description.md in CONFIG_DIR=$(CONFIG_DIR) ..."
|
||||
@CONFIG_DIR=$(CONFIG_DIR) $(PYTHON) -c "$$VALIDATE_SCRIPT"
|
||||
|
||||
lint:
|
||||
@echo "Linting not configured yet. Add ruff, flake8, or similar here."
|
||||
|
|
@ -167,12 +167,13 @@ class TableConfig:
|
|||
import re as _re
|
||||
valid_schedule = (
|
||||
_re.match(r"^every \d+[mh]$", self.sync_schedule)
|
||||
or _re.match(r"^daily \d{2}:\d{2}$", self.sync_schedule)
|
||||
or _re.match(r"^daily \d{2}:\d{2}(,\d{2}:\d{2})*$", self.sync_schedule)
|
||||
)
|
||||
if not valid_schedule:
|
||||
raise ValueError(
|
||||
f"Invalid sync_schedule '{self.sync_schedule}' for table {self.id}. "
|
||||
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00'"
|
||||
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00', "
|
||||
f"'daily 07:00,13:00,18:00'"
|
||||
)
|
||||
|
||||
# For partitioned, partition_by must be defined
|
||||
|
|
|
|||
103
tests/test_config_sync_schedule.py
Normal file
103
tests/test_config_sync_schedule.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
"""Tests for TableConfig.sync_schedule field validation."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import TableConfig
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
def _make_table(**overrides) -> TableConfig:
|
||||
"""Create a TableConfig with sensible defaults, applying overrides."""
|
||||
defaults = dict(
|
||||
id="test.dataset.table",
|
||||
name="test_table",
|
||||
description="Test",
|
||||
primary_key="id",
|
||||
sync_strategy="full_refresh",
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return TableConfig(**defaults)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
class TestSyncScheduleDefault:
|
||||
def test_default_is_none(self):
|
||||
table = _make_table()
|
||||
assert table.sync_schedule is None
|
||||
|
||||
|
||||
class TestSyncScheduleValidValues:
|
||||
@pytest.mark.parametrize(
|
||||
"schedule",
|
||||
[
|
||||
"every 15m",
|
||||
"every 1h",
|
||||
"daily 05:00",
|
||||
"daily 07:00,13:00,18:00",
|
||||
"daily 00:00,12:00",
|
||||
],
|
||||
ids=[
|
||||
"every-15m",
|
||||
"every-1h",
|
||||
"daily-single",
|
||||
"daily-three-times",
|
||||
"daily-two-times",
|
||||
],
|
||||
)
|
||||
def test_valid_schedule_accepted(self, schedule: str):
|
||||
table = _make_table(sync_schedule=schedule)
|
||||
assert table.sync_schedule == schedule
|
||||
|
||||
|
||||
class TestSyncScheduleEdgeCases:
|
||||
def test_every_zero_minutes(self):
|
||||
"""every 0m matches the regex -- validation is syntactic, not semantic."""
|
||||
table = _make_table(sync_schedule="every 0m")
|
||||
assert table.sync_schedule == "every 0m"
|
||||
|
||||
def test_daily_2359(self):
|
||||
table = _make_table(sync_schedule="daily 23:59")
|
||||
assert table.sync_schedule == "daily 23:59"
|
||||
|
||||
|
||||
class TestSyncScheduleInvalid:
|
||||
@pytest.mark.parametrize(
|
||||
"bad_schedule",
|
||||
[
|
||||
"daily 07:00,13:00,18:00,", # trailing comma
|
||||
"daily 7:00", # single-digit hour
|
||||
"daily", # missing time
|
||||
"hourly", # unsupported keyword
|
||||
"weekly", # unsupported keyword
|
||||
],
|
||||
ids=[
|
||||
"trailing-comma",
|
||||
"single-digit-hour",
|
||||
"daily-no-time",
|
||||
"hourly-keyword",
|
||||
"weekly-keyword",
|
||||
],
|
||||
)
|
||||
def test_invalid_schedule_raises(self, bad_schedule: str):
|
||||
with pytest.raises(ValueError, match="Invalid sync_schedule"):
|
||||
_make_table(sync_schedule=bad_schedule)
|
||||
|
||||
def test_empty_string_treated_as_none(self):
|
||||
"""Empty string is falsy, so validation is skipped (same as None)."""
|
||||
table = _make_table(sync_schedule="")
|
||||
assert table.sync_schedule == ""
|
||||
|
||||
def test_daily_25_accepted_by_regex(self):
|
||||
"""25:00 passes regex validation (two digits). Document this behavior."""
|
||||
table = _make_table(sync_schedule="daily 25:00")
|
||||
assert table.sync_schedule == "daily 25:00"
|
||||
|
||||
|
||||
class TestSyncScheduleNoneExplicit:
|
||||
def test_explicit_none_accepted(self):
|
||||
table = _make_table(sync_schedule=None)
|
||||
assert table.sync_schedule is None
|
||||
Loading…
Reference in a new issue