agnes-the-ai-analyst/app/api/v2_arrow.py
ZdenekSrotyr 2e1dfb7553
feat(v2): claude-driven fetch primitives + 0.14.0 (#102)
Replaces the BigQuery wrap-view pattern with a discovery + scoped-fetch toolkit driven by the analyst's Claude session. Adds /api/v2/{catalog,schema,sample,scan,scan/estimate}, da catalog/schema/describe/fetch/snapshot/disk-info CLI commands, sqlglot-backed WHERE validator, process-local quota tracker, agent rails skill (cli/skills/agnes-data-querying.md). BREAKING: BQ wrap views off by default — set data_source.bigquery.legacy_wrap_views=true for one cycle. Backward-compat field_validator on primary_key. Catalog cache now matches documented 300s TTL with RBAC fresh per request. Cuts release v0.14.0.
2026-04-29 01:07:19 +02:00

32 lines
1.1 KiB
Python

"""Arrow IPC serialization helpers for /api/v2/scan responses.
Server side serializes a pyarrow.Table to IPC stream bytes; client side
deserializes back. Content-Type is `application/vnd.apache.arrow.stream`.
"""
from __future__ import annotations
import io
import pyarrow as pa
CONTENT_TYPE = "application/vnd.apache.arrow.stream"
def arrow_table_to_ipc_bytes(source: pa.Table | pa.RecordBatchReader) -> bytes:
"""Serialize a pyarrow.Table or RecordBatchReader to Arrow IPC stream bytes."""
sink = io.BytesIO()
if isinstance(source, pa.RecordBatchReader):
with pa.ipc.new_stream(sink, source.schema) as writer:
for batch in source:
writer.write_batch(batch)
else:
with pa.ipc.new_stream(sink, source.schema) as writer:
for batch in source.to_batches():
writer.write_batch(batch)
return sink.getvalue()
def parse_ipc_bytes(data: bytes) -> pa.Table:
"""Deserialize Arrow IPC stream bytes to a pyarrow.Table."""
reader = pa.ipc.open_stream(io.BytesIO(data))
return reader.read_all()