fix(api): sample endpoint returns 500 for materialized BQ tables (#341)
* fix(api): v2 sample endpoint returns 500 for materialized BQ tables build_sample in app/api/v2_sample.py checked only source_type == 'bigquery' before routing to _fetch_bq_sample, so materialized tables (source_type='bigquery', query_mode='materialized') attempted a live BigQuery query for data that lives locally as parquet — causing an unhandled exception and HTTP 500. Fix mirrors the existing guard already in v2_schema.py (#261): skip _fetch_bq_sample when query_mode='materialized' and fall through to the local parquet read path. The parquet is the source of truth for any materialized source regardless of source_type. Regression test test_materialized_bq_table_reads_parquet_not_bq patches _fetch_bq_sample with a sentinel, registers a materialized BQ table, calls build_sample, and asserts (a) the sentinel was never hit and (b) rows came from the local parquet. Credit @davidrybar-grpn (#341, cleaned + rebased onto post-#340 main). * release: 0.54.28 — v2 sample endpoint materialized-BQ 500 fix --------- Co-authored-by: ZdenekSrotyr <zdenek.srotyr@keboola.com>
This commit is contained in:
parent
86933a2cb5
commit
e11f03eb60
4 changed files with 69 additions and 2 deletions
13
CHANGELOG.md
13
CHANGELOG.md
|
|
@ -10,6 +10,19 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.54.28] — 2026-05-18
|
||||
|
||||
### Fixed
|
||||
- `/api/v2/sample` (and `agnes describe`) no longer returns HTTP 500
|
||||
for materialized BigQuery tables (`source_type='bigquery'`,
|
||||
`query_mode='materialized'`). The handler previously routed any
|
||||
`source_type='bigquery'` row to `_fetch_bq_sample` regardless of
|
||||
query mode, attempting a live BigQuery query for data that lives
|
||||
locally as parquet. Fix mirrors the existing guard in
|
||||
`app/api/v2_schema.py` from #261 — materialized tables fall through
|
||||
to the local parquet read path. Regression-locked by
|
||||
`test_materialized_bq_table_reads_parquet_not_bq`. Closes #341.
|
||||
|
||||
## [0.54.27] — 2026-05-18
|
||||
|
||||
### Fixed
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ def build_sample(
|
|||
if cached is not None:
|
||||
return cached
|
||||
|
||||
if source_type == "bigquery":
|
||||
if source_type == "bigquery" and (row.get("query_mode") or "") != "materialized":
|
||||
rows = _fetch_bq_sample(bq, row.get("bucket") or "", row.get("source_table") or table_id, n)
|
||||
else:
|
||||
from app.utils import get_data_dir
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "agnes-the-ai-analyst"
|
||||
version = "0.54.27"
|
||||
version = "0.54.28"
|
||||
description = "Agnes — AI Data Analyst platform for AI analytical systems"
|
||||
requires-python = ">=3.11,<3.14"
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -162,6 +162,60 @@ class TestSampleEndpoint:
|
|||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_materialized_bq_table_reads_parquet_not_bq(self, reload_db, monkeypatch):
|
||||
"""Regression: build_sample routed materialized tables (source_type='bigquery',
|
||||
query_mode='materialized') to _fetch_bq_sample, which attempted a live BQ
|
||||
query for data that lives locally as parquet — causing HTTP 500.
|
||||
|
||||
After the fix, query_mode='materialized' must always fall through to the
|
||||
local parquet read path, regardless of source_type."""
|
||||
import duckdb as _duckdb
|
||||
from app.api import v2_sample
|
||||
from app.utils import get_data_dir
|
||||
|
||||
v2_sample._sample_cache.clear()
|
||||
|
||||
bq_called = []
|
||||
|
||||
def _fake_bq_fetch(*a, **kw):
|
||||
bq_called.append(True)
|
||||
return []
|
||||
|
||||
monkeypatch.setattr(v2_sample, "_fetch_bq_sample", _fake_bq_fetch)
|
||||
|
||||
parquet_dir = get_data_dir() / "extracts" / "bigquery" / "data"
|
||||
parquet_dir.mkdir(parents=True, exist_ok=True)
|
||||
parquet_path = parquet_dir / "order_economics.parquet"
|
||||
c = _duckdb.connect(":memory:")
|
||||
try:
|
||||
c.execute(
|
||||
"COPY (SELECT 'Los Angeles' AS customer_city, 100 AS orders "
|
||||
"UNION ALL SELECT 'New York', 80 AS orders) "
|
||||
f"TO '{parquet_path}' (FORMAT PARQUET)"
|
||||
)
|
||||
finally:
|
||||
c.close()
|
||||
|
||||
conn = reload_db.get_system_db()
|
||||
try:
|
||||
_ensure_admin1(conn)
|
||||
from src.repositories.table_registry import TableRegistryRepository
|
||||
TableRegistryRepository(conn).register(
|
||||
id="order_economics", name="order_economics",
|
||||
source_type="bigquery", query_mode="materialized",
|
||||
bucket="finance_unit_economics", source_table="order_economics",
|
||||
)
|
||||
user = {"id": "admin1", "email": "a@x.com"}
|
||||
data = v2_sample.build_sample(conn, user, "order_economics", n=5, bq=_bq())
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
assert not bq_called, "_fetch_bq_sample must not be called for materialized tables"
|
||||
assert data["table_id"] == "order_economics"
|
||||
assert len(data["rows"]) == 2
|
||||
cities = {r["customer_city"] for r in data["rows"]}
|
||||
assert cities == {"Los Angeles", "New York"}
|
||||
|
||||
|
||||
class TestBqAccessErrors:
|
||||
"""Issue #134: structured 502 translation on BQ errors in sample path.
|
||||
|
|
|
|||
Loading…
Reference in a new issue