fix: stdin register_bq parsing, separate BQ SQL validation
- cli/commands/query.py: --stdin mode now reads register_bq from the
JSON payload and merges it into the register_bq option list, matching
the documented {"register_bq": {...}, "sql": "..."} contract.
- src/remote_query.py: add _validate_bq_sql() with a narrower blocklist
(writes only); register_bq() now calls _validate_bq_sql() so legitimate
BQ operations like INFORMATION_SCHEMA, CALL, IMPORT are not blocked.
The final DuckDB execute() path still uses the full _validate_sql().
- tests/test_remote_query.py: add TestValidateBqSql covering allowed
INFORMATION_SCHEMA queries and blocked write operations.
This commit is contained in:
parent
f4129dc87d
commit
2ad8828f8c
3 changed files with 81 additions and 2 deletions
|
|
@ -41,6 +41,10 @@ def query_command(
|
||||||
try:
|
try:
|
||||||
payload = json.loads(raw)
|
payload = json.loads(raw)
|
||||||
resolved_sql = payload["sql"]
|
resolved_sql = payload["sql"]
|
||||||
|
# Extract register_bq from stdin JSON
|
||||||
|
stdin_bq = payload.get("register_bq", {})
|
||||||
|
if stdin_bq and isinstance(stdin_bq, dict):
|
||||||
|
register_bq = [f"{k}={v}" for k, v in stdin_bq.items()]
|
||||||
except (json.JSONDecodeError, KeyError) as exc:
|
except (json.JSONDecodeError, KeyError) as exc:
|
||||||
typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True)
|
typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True)
|
||||||
raise typer.Exit(1)
|
raise typer.Exit(1)
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,36 @@ def _validate_sql(sql: str) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# BQ SQL blocklist — only blocks write/mutation operations
|
||||||
|
_BQ_BLOCKED_KEYWORDS = [
|
||||||
|
"drop ",
|
||||||
|
"delete ",
|
||||||
|
"insert ",
|
||||||
|
"update ",
|
||||||
|
"alter ",
|
||||||
|
"create ",
|
||||||
|
"truncate ",
|
||||||
|
"merge ",
|
||||||
|
";", # prevent multi-statement
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_bq_sql(sql: str) -> None:
|
||||||
|
"""Validate BQ SQL — narrower than DuckDB blocklist, only blocks writes."""
|
||||||
|
sql_lower = sql.strip().lower()
|
||||||
|
for keyword in _BQ_BLOCKED_KEYWORDS:
|
||||||
|
if keyword in sql_lower:
|
||||||
|
raise RemoteQueryError(
|
||||||
|
f"Blocked BQ SQL keyword: {keyword.strip()}",
|
||||||
|
error_type="query_error",
|
||||||
|
)
|
||||||
|
if not sql_lower.startswith("select ") and not sql_lower.startswith("with "):
|
||||||
|
raise RemoteQueryError(
|
||||||
|
"BQ query must start with SELECT or WITH",
|
||||||
|
error_type="query_error",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> Dict[str, Any]:
|
def load_config() -> Dict[str, Any]:
|
||||||
"""Load the ``remote_query:`` section from instance.yaml.
|
"""Load the ``remote_query:`` section from instance.yaml.
|
||||||
|
|
||||||
|
|
@ -232,7 +262,7 @@ class RemoteQueryEngine:
|
||||||
error_type="query_error",
|
error_type="query_error",
|
||||||
)
|
)
|
||||||
|
|
||||||
_validate_sql(bq_sql)
|
_validate_bq_sql(bq_sql)
|
||||||
|
|
||||||
client = self._get_bq_client()
|
client = self._get_bq_client()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import duckdb
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_sql
|
from src.remote_query import RemoteQueryEngine, RemoteQueryError, _validate_bq_sql, _validate_sql
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -243,3 +243,48 @@ class TestValidateSql:
|
||||||
def test_allowed_sql(self, sql):
|
def test_allowed_sql(self, sql):
|
||||||
# Should not raise
|
# Should not raise
|
||||||
_validate_sql(sql)
|
_validate_sql(sql)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _validate_bq_sql unit tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidateBqSql:
|
||||||
|
def test_information_schema_is_allowed(self):
|
||||||
|
"""INFORMATION_SCHEMA queries must pass BQ SQL validation."""
|
||||||
|
# Should not raise
|
||||||
|
_validate_bq_sql("SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS")
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"sql",
|
||||||
|
[
|
||||||
|
"DROP TABLE x",
|
||||||
|
"INSERT INTO x VALUES (1)",
|
||||||
|
"DELETE FROM x",
|
||||||
|
"UPDATE x SET y=1",
|
||||||
|
"ALTER TABLE x ADD COLUMN z INT",
|
||||||
|
"CREATE TABLE x (y INT)",
|
||||||
|
"TRUNCATE TABLE x",
|
||||||
|
"MERGE INTO x USING y ON x.id=y.id WHEN MATCHED THEN UPDATE SET x.a=y.a",
|
||||||
|
"SELECT 1; DROP TABLE x",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_blocked_bq_sql(self, sql):
|
||||||
|
"""Write/mutation operations must be rejected."""
|
||||||
|
with pytest.raises(RemoteQueryError) as exc_info:
|
||||||
|
_validate_bq_sql(sql)
|
||||||
|
assert exc_info.value.error_type == "query_error"
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"sql",
|
||||||
|
[
|
||||||
|
"SELECT * FROM dataset.INFORMATION_SCHEMA.COLUMNS",
|
||||||
|
"SELECT id FROM project.dataset.table",
|
||||||
|
"WITH cte AS (SELECT 1 AS x) SELECT x FROM cte",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_allowed_bq_sql(self, sql):
|
||||||
|
"""Valid read-only BQ queries must pass."""
|
||||||
|
# Should not raise
|
||||||
|
_validate_bq_sql(sql)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue