Fix sync_schedule validation to accept multi-time daily format
The scheduler.py already supported "daily HH:MM,HH:MM,HH:MM" format
(commit 5f27d05), but config.py validation regex only accepted single
time "daily HH:MM", causing data-refresh to crash on startup.
Also adds:
- tests/test_config_sync_schedule.py (16 test cases)
- Makefile with validate-config target for CI/CD integration
This commit is contained in:
parent
5f27d05894
commit
ab99f0af92
3 changed files with 181 additions and 2 deletions
75
Makefile
Normal file
75
Makefile
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
# AI Data Analyst - Development Makefile
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# make - show help
|
||||||
|
# make test - run all tests
|
||||||
|
# make test-config - run config-related tests only
|
||||||
|
# make validate-config CONFIG_DIR=path/to/config - validate data_description.md
|
||||||
|
# make lint - placeholder for future linting
|
||||||
|
|
||||||
|
SHELL := /bin/bash
|
||||||
|
PYTHON := .venv/bin/python
|
||||||
|
PYTEST := .venv/bin/pytest
|
||||||
|
|
||||||
|
# Optional: path to config directory containing data_description.md
|
||||||
|
# Default: config/ (relative to project root)
|
||||||
|
CONFIG_DIR ?= config
|
||||||
|
|
||||||
|
.PHONY: help test test-config validate-config lint
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
help:
|
||||||
|
@echo "Available targets:"
|
||||||
|
@echo " make test Run all pytest tests"
|
||||||
|
@echo " make test-config Run config and scheduler tests only"
|
||||||
|
@echo " make validate-config Validate data_description.md parsing"
|
||||||
|
@echo " Optional: CONFIG_DIR=path/to/config (default: config/)"
|
||||||
|
@echo " make lint Placeholder for future linting"
|
||||||
|
@echo ""
|
||||||
|
@echo "Prerequisites: Python virtualenv at .venv/ with dependencies installed"
|
||||||
|
|
||||||
|
test:
|
||||||
|
$(PYTEST)
|
||||||
|
|
||||||
|
test-config:
|
||||||
|
$(PYTEST) tests/test_config_query_mode.py tests/test_config_sync_schedule.py tests/test_scheduler.py -v
|
||||||
|
|
||||||
|
define VALIDATE_SCRIPT
|
||||||
|
import os, sys, re, tempfile, shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
config_dir = Path(os.environ.get("CONFIG_DIR", "config"))
|
||||||
|
config_file = config_dir / "data_description.md"
|
||||||
|
if not config_file.exists():
|
||||||
|
print("FAIL: %s not found" % config_file, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Ensure docs/data_description.md exists so Config._find_project_root() works.
|
||||||
|
# If CONFIG_DIR points elsewhere, create a temporary symlink.
|
||||||
|
docs_path = Path("docs/data_description.md")
|
||||||
|
created_symlink = False
|
||||||
|
if not docs_path.exists():
|
||||||
|
docs_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
docs_path.symlink_to(config_file.resolve())
|
||||||
|
created_symlink = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.config import Config
|
||||||
|
c = Config()
|
||||||
|
names = ", ".join(t.name for t in c.tables)
|
||||||
|
print("OK: parsed %d table(s): %s" % (len(c.tables), names))
|
||||||
|
except Exception as e:
|
||||||
|
print("FAIL: %s" % e, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
if created_symlink:
|
||||||
|
docs_path.unlink(missing_ok=True)
|
||||||
|
endef
|
||||||
|
export VALIDATE_SCRIPT
|
||||||
|
|
||||||
|
validate-config:
|
||||||
|
@echo "Validating data_description.md in CONFIG_DIR=$(CONFIG_DIR) ..."
|
||||||
|
@CONFIG_DIR=$(CONFIG_DIR) $(PYTHON) -c "$$VALIDATE_SCRIPT"
|
||||||
|
|
||||||
|
lint:
|
||||||
|
@echo "Linting not configured yet. Add ruff, flake8, or similar here."
|
||||||
|
|
@ -167,12 +167,13 @@ class TableConfig:
|
||||||
import re as _re
|
import re as _re
|
||||||
valid_schedule = (
|
valid_schedule = (
|
||||||
_re.match(r"^every \d+[mh]$", self.sync_schedule)
|
_re.match(r"^every \d+[mh]$", self.sync_schedule)
|
||||||
or _re.match(r"^daily \d{2}:\d{2}$", self.sync_schedule)
|
or _re.match(r"^daily \d{2}:\d{2}(,\d{2}:\d{2})*$", self.sync_schedule)
|
||||||
)
|
)
|
||||||
if not valid_schedule:
|
if not valid_schedule:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid sync_schedule '{self.sync_schedule}' for table {self.id}. "
|
f"Invalid sync_schedule '{self.sync_schedule}' for table {self.id}. "
|
||||||
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00'"
|
f"Allowed formats: 'every 15m', 'every 1h', 'daily 05:00', "
|
||||||
|
f"'daily 07:00,13:00,18:00'"
|
||||||
)
|
)
|
||||||
|
|
||||||
# For partitioned, partition_by must be defined
|
# For partitioned, partition_by must be defined
|
||||||
|
|
|
||||||
103
tests/test_config_sync_schedule.py
Normal file
103
tests/test_config_sync_schedule.py
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
"""Tests for TableConfig.sync_schedule field validation."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.config import TableConfig
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def _make_table(**overrides) -> TableConfig:
|
||||||
|
"""Create a TableConfig with sensible defaults, applying overrides."""
|
||||||
|
defaults = dict(
|
||||||
|
id="test.dataset.table",
|
||||||
|
name="test_table",
|
||||||
|
description="Test",
|
||||||
|
primary_key="id",
|
||||||
|
sync_strategy="full_refresh",
|
||||||
|
)
|
||||||
|
defaults.update(overrides)
|
||||||
|
return TableConfig(**defaults)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class TestSyncScheduleDefault:
|
||||||
|
def test_default_is_none(self):
|
||||||
|
table = _make_table()
|
||||||
|
assert table.sync_schedule is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestSyncScheduleValidValues:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"schedule",
|
||||||
|
[
|
||||||
|
"every 15m",
|
||||||
|
"every 1h",
|
||||||
|
"daily 05:00",
|
||||||
|
"daily 07:00,13:00,18:00",
|
||||||
|
"daily 00:00,12:00",
|
||||||
|
],
|
||||||
|
ids=[
|
||||||
|
"every-15m",
|
||||||
|
"every-1h",
|
||||||
|
"daily-single",
|
||||||
|
"daily-three-times",
|
||||||
|
"daily-two-times",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_valid_schedule_accepted(self, schedule: str):
|
||||||
|
table = _make_table(sync_schedule=schedule)
|
||||||
|
assert table.sync_schedule == schedule
|
||||||
|
|
||||||
|
|
||||||
|
class TestSyncScheduleEdgeCases:
|
||||||
|
def test_every_zero_minutes(self):
|
||||||
|
"""every 0m matches the regex -- validation is syntactic, not semantic."""
|
||||||
|
table = _make_table(sync_schedule="every 0m")
|
||||||
|
assert table.sync_schedule == "every 0m"
|
||||||
|
|
||||||
|
def test_daily_2359(self):
|
||||||
|
table = _make_table(sync_schedule="daily 23:59")
|
||||||
|
assert table.sync_schedule == "daily 23:59"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSyncScheduleInvalid:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"bad_schedule",
|
||||||
|
[
|
||||||
|
"daily 07:00,13:00,18:00,", # trailing comma
|
||||||
|
"daily 7:00", # single-digit hour
|
||||||
|
"daily", # missing time
|
||||||
|
"hourly", # unsupported keyword
|
||||||
|
"weekly", # unsupported keyword
|
||||||
|
],
|
||||||
|
ids=[
|
||||||
|
"trailing-comma",
|
||||||
|
"single-digit-hour",
|
||||||
|
"daily-no-time",
|
||||||
|
"hourly-keyword",
|
||||||
|
"weekly-keyword",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_schedule_raises(self, bad_schedule: str):
|
||||||
|
with pytest.raises(ValueError, match="Invalid sync_schedule"):
|
||||||
|
_make_table(sync_schedule=bad_schedule)
|
||||||
|
|
||||||
|
def test_empty_string_treated_as_none(self):
|
||||||
|
"""Empty string is falsy, so validation is skipped (same as None)."""
|
||||||
|
table = _make_table(sync_schedule="")
|
||||||
|
assert table.sync_schedule == ""
|
||||||
|
|
||||||
|
def test_daily_25_accepted_by_regex(self):
|
||||||
|
"""25:00 passes regex validation (two digits). Document this behavior."""
|
||||||
|
table = _make_table(sync_schedule="daily 25:00")
|
||||||
|
assert table.sync_schedule == "daily 25:00"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSyncScheduleNoneExplicit:
|
||||||
|
def test_explicit_none_accepted(self):
|
||||||
|
table = _make_table(sync_schedule=None)
|
||||||
|
assert table.sync_schedule is None
|
||||||
Loading…
Reference in a new issue