fix: stdin register_bq parsing, separate BQ SQL validation

- cli/commands/query.py: --stdin mode now reads register_bq from the
  JSON payload and merges it into the register_bq option list, matching
  the documented {"register_bq": {...}, "sql": "..."} contract.
- src/remote_query.py: add _validate_bq_sql() with a narrower blocklist
  (writes only); register_bq() now calls _validate_bq_sql() so legitimate
  BQ operations like INFORMATION_SCHEMA, CALL, IMPORT are not blocked.
  The final DuckDB execute() path still uses the full _validate_sql().
- tests/test_remote_query.py: add TestValidateBqSql covering allowed
  INFORMATION_SCHEMA queries and blocked write operations.
This commit is contained in:
ZdenekSrotyr 2026-04-11 19:31:39 +02:00
parent f4129dc87d
commit 2ad8828f8c
3 changed files with 81 additions and 2 deletions

View file

@ -41,6 +41,10 @@ def query_command(
try: try:
payload = json.loads(raw) payload = json.loads(raw)
resolved_sql = payload["sql"] resolved_sql = payload["sql"]
# Extract register_bq from stdin JSON
stdin_bq = payload.get("register_bq", {})
if stdin_bq and isinstance(stdin_bq, dict):
register_bq = [f"{k}={v}" for k, v in stdin_bq.items()]
except (json.JSONDecodeError, KeyError) as exc: except (json.JSONDecodeError, KeyError) as exc:
typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True) typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True)
raise typer.Exit(1) raise typer.Exit(1)

View file

@ -145,6 +145,36 @@ def _validate_sql(sql: str) -> None:
) )
# BQ SQL blocklist — only blocks write/mutation operations
_BQ_BLOCKED_KEYWORDS = [
"drop ",
"delete ",
"insert ",
"update ",
"alter ",
"create ",
"truncate ",
"merge ",
";", # prevent multi-statement
]
def _validate_bq_sql(sql: str) -> None:
"""Validate BQ SQL — narrower than DuckDB blocklist, only blocks writes."""
sql_lower = sql.strip().lower()
for keyword in _BQ_BLOCKED_KEYWORDS:
if keyword in sql_lower:
raise RemoteQueryError(
f"Blocked BQ SQL keyword: {keyword.strip()}",
error_type="query_error",
)
if not sql_lower.startswith("select ") and not sql_lower.startswith("with "):
raise RemoteQueryError(
"BQ query must start with SELECT or WITH",
error_type="query_error",
)
def load_config() -> Dict[str, Any]: def load_config() -> Dict[str, Any]:
"""Load the ``remote_query:`` section from instance.yaml. """Load the ``remote_query:`` section from instance.yaml.
@ -232,7 +262,7 @@ class RemoteQueryEngine:
error_type="query_error", error_type="query_error",
) )
_validate_sql(bq_sql) _validate_bq_sql(bq_sql)
client = self._get_bq_client() client = self._get_bq_client()

View file

@ -9,7 +9,7 @@ import duckdb
import pyarrow as pa import pyarrow as pa
import pytest import pytest
from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_sql from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_bq_sql, _validate_sql
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -243,3 +243,48 @@ class TestValidateSql:
def test_allowed_sql(self, sql): def test_allowed_sql(self, sql):
# Should not raise # Should not raise
_validate_sql(sql) _validate_sql(sql)
# ---------------------------------------------------------------------------
# _validate_bq_sql unit tests
# ---------------------------------------------------------------------------
class TestValidateBqSql:
def test_information_schema_is_allowed(self):
"""INFORMATION_SCHEMA queries must pass BQ SQL validation."""
# Should not raise
_validate_bq_sql("SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS")
@pytest.mark.parametrize(
"sql",
[
"DROP TABLE x",
"INSERT INTO x VALUES (1)",
"DELETE FROM x",
"UPDATE x SET y=1",
"ALTER TABLE x ADD COLUMN z INT",
"CREATE TABLE x (y INT)",
"TRUNCATE TABLE x",
"MERGE INTO x USING y ON x.id=y.id WHEN MATCHED THEN UPDATE SET x.a=y.a",
"SELECT 1; DROP TABLE x",
],
)
def test_blocked_bq_sql(self, sql):
"""Write/mutation operations must be rejected."""
with pytest.raises(RemoteQueryError) as exc_info:
_validate_bq_sql(sql)
assert exc_info.value.error_type == "query_error"
@pytest.mark.parametrize(
"sql",
[
"SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS",
"SELECT id FROM project.dataset.table",
"WITH cte AS (SELECT 1 AS x) SELECT x FROM cte",
],
)
def test_allowed_bq_sql(self, sql):
"""Valid read-only BQ queries must pass."""
# Should not raise
_validate_bq_sql(sql)