From 4f74543a12aca9ef1f277d2b5a030852f1a9a1e4 Mon Sep 17 00:00:00 2001 From: Petr Date: Wed, 11 Mar 2026 20:15:35 +0100 Subject: [PATCH] Fix streaming: use RowIterator.to_arrow_iterable() not QueryJob QueryJob only has to_arrow(), not to_arrow_iterable(). Must call query_job.result() first to get RowIterator, which has the streaming to_arrow_iterable() method. --- connectors/bigquery/client.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/connectors/bigquery/client.py b/connectors/bigquery/client.py index c07aaad..90c1161 100644 --- a/connectors/bigquery/client.py +++ b/connectors/bigquery/client.py @@ -319,10 +319,14 @@ class BigQueryClient: query_job = self.client.query(sql, job_config=job_config) + # result() returns RowIterator which has to_arrow_iterable() + # (QueryJob itself only has to_arrow(), not to_arrow_iterable()) + row_iter = query_job.result() + # Try BQ Storage API first (faster, parallel gRPC streams). # Fall back to REST API if SA lacks bigquery.readsessions.create permission. try: - batch_iter = query_job.to_arrow_iterable() + batch_iter = row_iter.to_arrow_iterable() # Probe first batch to detect Storage API permission errors early first_batch = next(batch_iter, None) if first_batch is not None: @@ -337,8 +341,9 @@ class BigQueryClient: "falling back to REST API (streaming)" ) - # Fallback: REST API streaming (same query_job, just different reader) - yield from query_job.to_arrow_iterable(create_bqstorage_client=False) + # Fallback: REST API streaming (re-execute query for fresh RowIterator) + row_iter = self.client.query(sql, job_config=job_config).result() + yield from row_iter.to_arrow_iterable(create_bqstorage_client=False) def read_table_streaming( self,