fix: stdin register_bq parsing, separate BQ SQL validation
- cli/commands/query.py: --stdin mode now reads register_bq from the
JSON payload and merges it into the register_bq option list, matching
the documented {"register_bq": {...}, "sql": "..."} contract.
- src/remote_query.py: add _validate_bq_sql() with a narrower blocklist
(writes only); register_bq() now calls _validate_bq_sql() so legitimate
BQ operations like INFORMATION_SCHEMA, CALL, IMPORT are not blocked.
The final DuckDB execute() path still uses the full _validate_sql().
- tests/test_remote_query.py: add TestValidateBqSql covering allowed
INFORMATION_SCHEMA queries and blocked write operations.
This commit is contained in:
parent
f4129dc87d
commit
2ad8828f8c
3 changed files with 81 additions and 2 deletions
|
|
@ -41,6 +41,10 @@ def query_command(
|
|||
try:
|
||||
payload = json.loads(raw)
|
||||
resolved_sql = payload["sql"]
|
||||
# Extract register_bq from stdin JSON
|
||||
stdin_bq = payload.get("register_bq", {})
|
||||
if stdin_bq and isinstance(stdin_bq, dict):
|
||||
register_bq = [f"{k}={v}" for k, v in stdin_bq.items()]
|
||||
except (json.JSONDecodeError, KeyError) as exc:
|
||||
typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
|
|
|||
|
|
@ -145,6 +145,36 @@ def _validate_sql(sql: str) -> None:
|
|||
)
|
||||
|
||||
|
||||
# BQ SQL blocklist — only blocks write/mutation operations
|
||||
_BQ_BLOCKED_KEYWORDS = [
|
||||
"drop ",
|
||||
"delete ",
|
||||
"insert ",
|
||||
"update ",
|
||||
"alter ",
|
||||
"create ",
|
||||
"truncate ",
|
||||
"merge ",
|
||||
";", # prevent multi-statement
|
||||
]
|
||||
|
||||
|
||||
def _validate_bq_sql(sql: str) -> None:
|
||||
"""Validate BQ SQL — narrower than DuckDB blocklist, only blocks writes."""
|
||||
sql_lower = sql.strip().lower()
|
||||
for keyword in _BQ_BLOCKED_KEYWORDS:
|
||||
if keyword in sql_lower:
|
||||
raise RemoteQueryError(
|
||||
f"Blocked BQ SQL keyword: {keyword.strip()}",
|
||||
error_type="query_error",
|
||||
)
|
||||
if not sql_lower.startswith("select ") and not sql_lower.startswith("with "):
|
||||
raise RemoteQueryError(
|
||||
"BQ query must start with SELECT or WITH",
|
||||
error_type="query_error",
|
||||
)
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load the ``remote_query:`` section from instance.yaml.
|
||||
|
||||
|
|
@ -232,7 +262,7 @@ class RemoteQueryEngine:
|
|||
error_type="query_error",
|
||||
)
|
||||
|
||||
_validate_sql(bq_sql)
|
||||
_validate_bq_sql(bq_sql)
|
||||
|
||||
client = self._get_bq_client()
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import duckdb
|
|||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_sql
|
||||
from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_bq_sql, _validate_sql
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -243,3 +243,48 @@ class TestValidateSql:
|
|||
def test_allowed_sql(self, sql):
|
||||
# Should not raise
|
||||
_validate_sql(sql)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _validate_bq_sql unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestValidateBqSql:
|
||||
def test_information_schema_is_allowed(self):
|
||||
"""INFORMATION_SCHEMA queries must pass BQ SQL validation."""
|
||||
# Should not raise
|
||||
_validate_bq_sql("SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sql",
|
||||
[
|
||||
"DROP TABLE x",
|
||||
"INSERT INTO x VALUES (1)",
|
||||
"DELETE FROM x",
|
||||
"UPDATE x SET y=1",
|
||||
"ALTER TABLE x ADD COLUMN z INT",
|
||||
"CREATE TABLE x (y INT)",
|
||||
"TRUNCATE TABLE x",
|
||||
"MERGE INTO x USING y ON x.id=y.id WHEN MATCHED THEN UPDATE SET x.a=y.a",
|
||||
"SELECT 1; DROP TABLE x",
|
||||
],
|
||||
)
|
||||
def test_blocked_bq_sql(self, sql):
|
||||
"""Write/mutation operations must be rejected."""
|
||||
with pytest.raises(RemoteQueryError) as exc_info:
|
||||
_validate_bq_sql(sql)
|
||||
assert exc_info.value.error_type == "query_error"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sql",
|
||||
[
|
||||
"SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS",
|
||||
"SELECT id FROM project.dataset.table",
|
||||
"WITH cte AS (SELECT 1 AS x) SELECT x FROM cte",
|
||||
],
|
||||
)
|
||||
def test_allowed_bq_sql(self, sql):
|
||||
"""Valid read-only BQ queries must pass."""
|
||||
# Should not raise
|
||||
_validate_bq_sql(sql)
|
||||
|
|
|
|||
Loading…
Reference in a new issue