agnes-the-ai-analyst/.github/workflows/ci.yml
ZdenekSrotyr a1c7849b3e
ci: shard test suite + drop duplicate test run (#311)
The `test` job in ci.yml becomes a 4-way `test-shard` matrix (pytest-split,
balanced by a committed .test_durations), aggregated into a single `test`
status check so branch protection is unchanged.

release.yml's duplicate full-suite `test` job is removed — it re-ran the
same ~10 min suite a second time on every push to main/feature branches.
release.yml is now image-build only; the advisory ruff/mypy steps move to
a lean `lint` job in ci.yml.

Net: ~10 min -> ~3 min wall-clock per push, and the suite runs once
instead of twice.
2026-05-14 20:18:21 +00:00

177 lines
5.8 KiB
YAML

name: CI
on:
push:
branches: [main, "feature/**"]
pull_request:
branches: [main]
schedule:
- cron: "0 3 * * *" # Nightly at 03:00 UTC — runs docker-e2e
workflow_dispatch:
jobs:
test-shard:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
group: [1, 2, 3, 4]
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies
run: uv pip install --system ".[dev,server]"
- name: Run tests (shard ${{ matrix.group }}/4)
# pytest-split shards the suite across 4 parallel jobs, balanced by
# the committed `.test_durations` file; `-n auto` parallelises
# within each shard across the runner's cores. Regenerate durations
# with `pytest tests/ --store-durations -n auto` when the suite
# drifts enough that shards become uneven.
run: pytest tests/ -v --tb=short -n auto --splits 4 --group ${{ matrix.group }}
env:
TESTING: "1"
# Single required status check. Branch protection requires `test`, but the
# matrix above publishes `test-shard (1..4)` — this job aggregates them
# into one `test` result so no branch-protection change is needed.
test:
needs: test-shard
if: always()
runs-on: ubuntu-latest
steps:
- name: Verify all test shards passed
run: |
if [ "${{ needs.test-shard.result }}" != "success" ]; then
echo "::error::test-shard result was '${{ needs.test-shard.result }}' — one or more shards failed"
exit 1
fi
echo "All 4 test shards passed."
lint:
# Advisory only (continue-on-error) — ruff + mypy surface issues but
# never gate. Split out of release.yml's old test job; runs without the
# full dependency install since neither tool needs it.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Lint with ruff
run: |
pip install ruff
ruff check . || true
continue-on-error: true
- name: Type check with mypy
run: |
pip install mypy
mypy src/ app/ cli/ connectors/ --ignore-missing-imports --no-error-summary || true
continue-on-error: true
cli-wheel-clean-install:
# Catches the "wheel METADATA conflicts with transitive deps under fresh
# resolver" class — exactly what the workspace-only `[tool.uv]
# override-dependencies` does NOT protect against. Builds the wheel the
# way `release.yml` ships it to analysts (`uv build --wheel`), then
# installs it into a fresh `python:3.13-slim` container with `uv tool
# install` (the path the `/setup` page advertises) and asserts the
# `agnes` binary actually launches. Without this, a regression like
# 0.53.3's `kbcstorage>=0.9.0 → urllib3<2.0.0` cap silently caps the
# wheel METADATA, every existing test passes (workspace overrides the
# cap), and the break only surfaces on the next analyst's first install.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Build wheel
run: uv build --wheel --out-dir dist
- name: Write CLI install assertion script
run: |
cat > /tmp/smoke.py <<'PY'
import sys, urllib3
try:
import kbcstorage # noqa: F401
sys.exit("REGRESSION: kbcstorage leaked into the CLI wheel — should be in [server] extra only")
except ImportError:
pass
maj, minor = (int(x) for x in urllib3.__version__.split(".")[:2])
assert (maj, minor) >= (2, 7), f"urllib3 too old: {urllib3.__version__}"
print(f"OK: kbcstorage absent, urllib3 {urllib3.__version__}")
PY
- name: Smoke install in fresh python:3.13-slim
run: |
docker run --rm \
-v "$PWD/dist:/wheels:ro" \
-v /tmp/smoke.py:/smoke.py:ro \
python:3.13-slim bash -c '
set -euo pipefail
apt-get update -qq && apt-get install -y -qq --no-install-recommends curl ca-certificates >/dev/null
curl -LsSf https://astral.sh/uv/install.sh | sh > /dev/null 2>&1
export PATH="$HOME/.local/bin:$PATH"
WHEEL=$(ls /wheels/agnes_the_ai_analyst-*-py3-none-any.whl | head -1)
uv tool install --force "$WHEEL"
agnes --version
agnes --help > /dev/null
agnes catalog --help > /dev/null
# Run the assertion in the same venv uv tool created
"$HOME/.local/share/uv/tools/agnes-the-ai-analyst/bin/python" /smoke.py
'
docker-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Build Docker image
run: docker build -t data-analyst:test .
docker-e2e:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
needs: docker-build
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies
run: uv pip install --system ".[dev,server]"
- name: Start services
run: |
touch .env
docker compose up -d --wait --wait-timeout 60
- name: Run Docker E2E tests
run: pytest tests/ -v --tb=short -m docker --timeout=120
env:
TESTING: "1"
- name: Stop services
if: always()
run: docker compose down