diff --git a/CHANGELOG.md b/CHANGELOG.md index a391067..a1dbd8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,19 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.54.28] — 2026-05-18 + +### Fixed +- `/api/v2/sample` (and `agnes describe`) no longer returns HTTP 500 + for materialized BigQuery tables (`source_type='bigquery'`, + `query_mode='materialized'`). The handler previously routed any + `source_type='bigquery'` row to `_fetch_bq_sample` regardless of + query mode, attempting a live BigQuery query for data that lives + locally as parquet. Fix mirrors the existing guard in + `app/api/v2_schema.py` from #261 — materialized tables fall through + to the local parquet read path. Regression-locked by + `test_materialized_bq_table_reads_parquet_not_bq`. Closes #341. + ## [0.54.27] — 2026-05-18 ### Fixed diff --git a/app/api/v2_sample.py b/app/api/v2_sample.py index 7063f8b..1bc7eac 100644 --- a/app/api/v2_sample.py +++ b/app/api/v2_sample.py @@ -144,7 +144,7 @@ def build_sample( if cached is not None: return cached - if source_type == "bigquery": + if source_type == "bigquery" and (row.get("query_mode") or "") != "materialized": rows = _fetch_bq_sample(bq, row.get("bucket") or "", row.get("source_table") or table_id, n) else: from app.utils import get_data_dir diff --git a/pyproject.toml b/pyproject.toml index d7a0fee..af4be5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.54.27" +version = "0.54.28" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" diff --git a/tests/test_v2_sample.py b/tests/test_v2_sample.py index 3267190..6d6f8c2 100644 --- a/tests/test_v2_sample.py +++ b/tests/test_v2_sample.py @@ -162,6 +162,60 @@ class TestSampleEndpoint: finally: conn.close() + def test_materialized_bq_table_reads_parquet_not_bq(self, reload_db, monkeypatch): + """Regression: build_sample routed materialized tables (source_type='bigquery', + query_mode='materialized') to _fetch_bq_sample, which attempted a live BQ + query for data that lives locally as parquet — causing HTTP 500. + + After the fix, query_mode='materialized' must always fall through to the + local parquet read path, regardless of source_type.""" + import duckdb as _duckdb + from app.api import v2_sample + from app.utils import get_data_dir + + v2_sample._sample_cache.clear() + + bq_called = [] + + def _fake_bq_fetch(*a, **kw): + bq_called.append(True) + return [] + + monkeypatch.setattr(v2_sample, "_fetch_bq_sample", _fake_bq_fetch) + + parquet_dir = get_data_dir() / "extracts" / "bigquery" / "data" + parquet_dir.mkdir(parents=True, exist_ok=True) + parquet_path = parquet_dir / "order_economics.parquet" + c = _duckdb.connect(":memory:") + try: + c.execute( + "COPY (SELECT 'Los Angeles' AS customer_city, 100 AS orders " + "UNION ALL SELECT 'New York', 80 AS orders) " + f"TO '{parquet_path}' (FORMAT PARQUET)" + ) + finally: + c.close() + + conn = reload_db.get_system_db() + try: + _ensure_admin1(conn) + from src.repositories.table_registry import TableRegistryRepository + TableRegistryRepository(conn).register( + id="order_economics", name="order_economics", + source_type="bigquery", query_mode="materialized", + bucket="finance_unit_economics", source_table="order_economics", + ) + user = {"id": "admin1", "email": "a@x.com"} + data = v2_sample.build_sample(conn, user, "order_economics", n=5, bq=_bq()) + finally: + conn.close() + + assert not bq_called, "_fetch_bq_sample must not be called for materialized tables" + assert data["table_id"] == "order_economics" + assert len(data["rows"]) == 2 + cities = {r["customer_city"] for r in data["rows"]} + assert cities == {"Los Angeles", "New York"} + class TestBqAccessErrors: """Issue #134: structured 502 translation on BQ errors in sample path.