From 2ad8828f8c7c4082512e528602ff540580e727d5 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Sat, 11 Apr 2026 19:31:39 +0200 Subject: [PATCH] fix: stdin register_bq parsing, separate BQ SQL validation - cli/commands/query.py: --stdin mode now reads register_bq from the JSON payload and merges it into the register_bq option list, matching the documented {"register_bq": {...}, "sql": "..."} contract. - src/remote_query.py: add _validate_bq_sql() with a narrower blocklist (writes only); register_bq() now calls _validate_bq_sql() so legitimate BQ operations like INFORMATION_SCHEMA, CALL, IMPORT are not blocked. The final DuckDB execute() path still uses the full _validate_sql(). - tests/test_remote_query.py: add TestValidateBqSql covering allowed INFORMATION_SCHEMA queries and blocked write operations. --- cli/commands/query.py | 4 ++++ src/remote_query.py | 32 +++++++++++++++++++++++++- tests/test_remote_query.py | 47 +++++++++++++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/cli/commands/query.py b/cli/commands/query.py index 2b039c3..d959e2b 100644 --- a/cli/commands/query.py +++ b/cli/commands/query.py @@ -41,6 +41,10 @@ def query_command( try: payload = json.loads(raw) resolved_sql = payload["sql"] + # Extract register_bq from stdin JSON + stdin_bq = payload.get("register_bq", {}) + if stdin_bq and isinstance(stdin_bq, dict): + register_bq = [f"{k}={v}" for k, v in stdin_bq.items()] except (json.JSONDecodeError, KeyError) as exc: typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True) raise typer.Exit(1) diff --git a/src/remote_query.py b/src/remote_query.py index d14965d..5ebcfba 100644 --- a/src/remote_query.py +++ b/src/remote_query.py @@ -145,6 +145,36 @@ def _validate_sql(sql: str) -> None: ) +# BQ SQL blocklist — only blocks write/mutation operations +_BQ_BLOCKED_KEYWORDS = [ + "drop ", + "delete ", + "insert ", + "update ", + "alter ", + "create ", + "truncate ", + "merge ", + ";", # prevent multi-statement +] + + +def _validate_bq_sql(sql: str) -> None: + """Validate BQ SQL — narrower than DuckDB blocklist, only blocks writes.""" + sql_lower = sql.strip().lower() + for keyword in _BQ_BLOCKED_KEYWORDS: + if keyword in sql_lower: + raise RemoteQueryError( + f"Blocked BQ SQL keyword: {keyword.strip()}", + error_type="query_error", + ) + if not sql_lower.startswith("select ") and not sql_lower.startswith("with "): + raise RemoteQueryError( + "BQ query must start with SELECT or WITH", + error_type="query_error", + ) + + def load_config() -> Dict[str, Any]: """Load the ``remote_query:`` section from instance.yaml. @@ -232,7 +262,7 @@ class RemoteQueryEngine: error_type="query_error", ) - _validate_sql(bq_sql) + _validate_bq_sql(bq_sql) client = self._get_bq_client() diff --git a/tests/test_remote_query.py b/tests/test_remote_query.py index 36e186f..f4de108 100644 --- a/tests/test_remote_query.py +++ b/tests/test_remote_query.py @@ -9,7 +9,7 @@ import duckdb import pyarrow as pa import pytest -from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_sql +from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_bq_sql, _validate_sql # --------------------------------------------------------------------------- @@ -243,3 +243,48 @@ class TestValidateSql: def test_allowed_sql(self, sql): # Should not raise _validate_sql(sql) + + +# --------------------------------------------------------------------------- +# _validate_bq_sql unit tests +# --------------------------------------------------------------------------- + + +class TestValidateBqSql: + def test_information_schema_is_allowed(self): + """INFORMATION_SCHEMA queries must pass BQ SQL validation.""" + # Should not raise + _validate_bq_sql("SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS") + + @pytest.mark.parametrize( + "sql", + [ + "DROP TABLE x", + "INSERT INTO x VALUES (1)", + "DELETE FROM x", + "UPDATE x SET y=1", + "ALTER TABLE x ADD COLUMN z INT", + "CREATE TABLE x (y INT)", + "TRUNCATE TABLE x", + "MERGE INTO x USING y ON x.id=y.id WHEN MATCHED THEN UPDATE SET x.a=y.a", + "SELECT 1; DROP TABLE x", + ], + ) + def test_blocked_bq_sql(self, sql): + """Write/mutation operations must be rejected.""" + with pytest.raises(RemoteQueryError) as exc_info: + _validate_bq_sql(sql) + assert exc_info.value.error_type == "query_error" + + @pytest.mark.parametrize( + "sql", + [ + "SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS", + "SELECT id FROM project.dataset.table", + "WITH cte AS (SELECT 1 AS x) SELECT x FROM cte", + ], + ) + def test_allowed_bq_sql(self, sql): + """Valid read-only BQ queries must pass.""" + # Should not raise + _validate_bq_sql(sql)