Fix streaming: use RowIterator.to_arrow_iterable() not QueryJob

QueryJob only has to_arrow(), not to_arrow_iterable().
Must call query_job.result() first to get RowIterator,
which has the streaming to_arrow_iterable() method.
This commit is contained in:
Petr 2026-03-11 20:15:35 +01:00
parent ee70da86c3
commit 4f74543a12

View file

@ -319,10 +319,14 @@ class BigQueryClient:
query_job = self.client.query(sql, job_config=job_config)
# result() returns RowIterator which has to_arrow_iterable()
# (QueryJob itself only has to_arrow(), not to_arrow_iterable())
row_iter = query_job.result()
# Try BQ Storage API first (faster, parallel gRPC streams).
# Fall back to REST API if SA lacks bigquery.readsessions.create permission.
try:
batch_iter = query_job.to_arrow_iterable()
batch_iter = row_iter.to_arrow_iterable()
# Probe first batch to detect Storage API permission errors early
first_batch = next(batch_iter, None)
if first_batch is not None:
@ -337,8 +341,9 @@ class BigQueryClient:
"falling back to REST API (streaming)"
)
# Fallback: REST API streaming (same query_job, just different reader)
yield from query_job.to_arrow_iterable(create_bqstorage_client=False)
# Fallback: REST API streaming (re-execute query for fresh RowIterator)
row_iter = self.client.query(sql, job_config=job_config).result()
yield from row_iter.to_arrow_iterable(create_bqstorage_client=False)
def read_table_streaming(
self,