fix(api): sample endpoint returns 500 for materialized BQ tables (#341)
* fix(api): v2 sample endpoint returns 500 for materialized BQ tables build_sample in app/api/v2_sample.py checked only source_type == 'bigquery' before routing to _fetch_bq_sample, so materialized tables (source_type='bigquery', query_mode='materialized') attempted a live BigQuery query for data that lives locally as parquet — causing an unhandled exception and HTTP 500. Fix mirrors the existing guard already in v2_schema.py (#261): skip _fetch_bq_sample when query_mode='materialized' and fall through to the local parquet read path. The parquet is the source of truth for any materialized source regardless of source_type. Regression test test_materialized_bq_table_reads_parquet_not_bq patches _fetch_bq_sample with a sentinel, registers a materialized BQ table, calls build_sample, and asserts (a) the sentinel was never hit and (b) rows came from the local parquet. Credit @davidrybar-grpn (#341, cleaned + rebased onto post-#340 main). * release: 0.54.28 — v2 sample endpoint materialized-BQ 500 fix --------- Co-authored-by: ZdenekSrotyr <zdenek.srotyr@keboola.com>
This commit is contained in:
parent
86933a2cb5
commit
e11f03eb60
4 changed files with 69 additions and 2 deletions
13
CHANGELOG.md
13
CHANGELOG.md
|
|
@ -10,6 +10,19 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.54.28] — 2026-05-18
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- `/api/v2/sample` (and `agnes describe`) no longer returns HTTP 500
|
||||||
|
for materialized BigQuery tables (`source_type='bigquery'`,
|
||||||
|
`query_mode='materialized'`). The handler previously routed any
|
||||||
|
`source_type='bigquery'` row to `_fetch_bq_sample` regardless of
|
||||||
|
query mode, attempting a live BigQuery query for data that lives
|
||||||
|
locally as parquet. Fix mirrors the existing guard in
|
||||||
|
`app/api/v2_schema.py` from #261 — materialized tables fall through
|
||||||
|
to the local parquet read path. Regression-locked by
|
||||||
|
`test_materialized_bq_table_reads_parquet_not_bq`. Closes #341.
|
||||||
|
|
||||||
## [0.54.27] — 2026-05-18
|
## [0.54.27] — 2026-05-18
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ def build_sample(
|
||||||
if cached is not None:
|
if cached is not None:
|
||||||
return cached
|
return cached
|
||||||
|
|
||||||
if source_type == "bigquery":
|
if source_type == "bigquery" and (row.get("query_mode") or "") != "materialized":
|
||||||
rows = _fetch_bq_sample(bq, row.get("bucket") or "", row.get("source_table") or table_id, n)
|
rows = _fetch_bq_sample(bq, row.get("bucket") or "", row.get("source_table") or table_id, n)
|
||||||
else:
|
else:
|
||||||
from app.utils import get_data_dir
|
from app.utils import get_data_dir
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "agnes-the-ai-analyst"
|
name = "agnes-the-ai-analyst"
|
||||||
version = "0.54.27"
|
version = "0.54.28"
|
||||||
description = "Agnes — AI Data Analyst platform for AI analytical systems"
|
description = "Agnes — AI Data Analyst platform for AI analytical systems"
|
||||||
requires-python = ">=3.11,<3.14"
|
requires-python = ">=3.11,<3.14"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
||||||
|
|
@ -162,6 +162,60 @@ class TestSampleEndpoint:
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
def test_materialized_bq_table_reads_parquet_not_bq(self, reload_db, monkeypatch):
|
||||||
|
"""Regression: build_sample routed materialized tables (source_type='bigquery',
|
||||||
|
query_mode='materialized') to _fetch_bq_sample, which attempted a live BQ
|
||||||
|
query for data that lives locally as parquet — causing HTTP 500.
|
||||||
|
|
||||||
|
After the fix, query_mode='materialized' must always fall through to the
|
||||||
|
local parquet read path, regardless of source_type."""
|
||||||
|
import duckdb as _duckdb
|
||||||
|
from app.api import v2_sample
|
||||||
|
from app.utils import get_data_dir
|
||||||
|
|
||||||
|
v2_sample._sample_cache.clear()
|
||||||
|
|
||||||
|
bq_called = []
|
||||||
|
|
||||||
|
def _fake_bq_fetch(*a, **kw):
|
||||||
|
bq_called.append(True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
monkeypatch.setattr(v2_sample, "_fetch_bq_sample", _fake_bq_fetch)
|
||||||
|
|
||||||
|
parquet_dir = get_data_dir() / "extracts" / "bigquery" / "data"
|
||||||
|
parquet_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
parquet_path = parquet_dir / "order_economics.parquet"
|
||||||
|
c = _duckdb.connect(":memory:")
|
||||||
|
try:
|
||||||
|
c.execute(
|
||||||
|
"COPY (SELECT 'Los Angeles' AS customer_city, 100 AS orders "
|
||||||
|
"UNION ALL SELECT 'New York', 80 AS orders) "
|
||||||
|
f"TO '{parquet_path}' (FORMAT PARQUET)"
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
conn = reload_db.get_system_db()
|
||||||
|
try:
|
||||||
|
_ensure_admin1(conn)
|
||||||
|
from src.repositories.table_registry import TableRegistryRepository
|
||||||
|
TableRegistryRepository(conn).register(
|
||||||
|
id="order_economics", name="order_economics",
|
||||||
|
source_type="bigquery", query_mode="materialized",
|
||||||
|
bucket="finance_unit_economics", source_table="order_economics",
|
||||||
|
)
|
||||||
|
user = {"id": "admin1", "email": "a@x.com"}
|
||||||
|
data = v2_sample.build_sample(conn, user, "order_economics", n=5, bq=_bq())
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert not bq_called, "_fetch_bq_sample must not be called for materialized tables"
|
||||||
|
assert data["table_id"] == "order_economics"
|
||||||
|
assert len(data["rows"]) == 2
|
||||||
|
cities = {r["customer_city"] for r in data["rows"]}
|
||||||
|
assert cities == {"Los Angeles", "New York"}
|
||||||
|
|
||||||
|
|
||||||
class TestBqAccessErrors:
|
class TestBqAccessErrors:
|
||||||
"""Issue #134: structured 502 translation on BQ errors in sample path.
|
"""Issue #134: structured 502 translation on BQ errors in sample path.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue