fix: alias validation, url escaping, read-only CLI, blocklist comment

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ZdenekSrotyr 2026-04-11 11:28:27 +02:00
parent 872b06ffae
commit f4129dc87d
4 changed files with 47 additions and 4 deletions

View file

@ -105,7 +105,7 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
typer.echo("Local DuckDB not found. Run: da sync", err=True)
raise typer.Exit(1)
conn = duckdb.connect(str(db_path))
conn = duckdb.connect(str(db_path), read_only=True)
try:
config = load_config()
engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in (

View file

@ -321,14 +321,15 @@ def _reattach_remote_extensions(
try:
conn.execute(f"LOAD {extension};")
token = os.environ.get(token_env, "") if token_env else ""
safe_url = url.replace("'", "''")
if token:
escaped_token = token.replace("'", "''")
conn.execute(
f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
)
else:
conn.execute(
f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)"
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)"
)
attached_dbs.add(alias)
logger.debug("Re-attached remote source %s via %s extension", alias, extension)

View file

@ -11,14 +11,24 @@ from __future__ import annotations
import logging
import os
import re
from typing import Any, Dict, List, Optional
import duckdb
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
_RESERVED_ALIASES = {
"information_schema", "duckdb_tables", "duckdb_columns",
"duckdb_databases", "duckdb_settings", "duckdb_functions",
"duckdb_views", "duckdb_indexes", "duckdb_schemas",
"main", "memory", "system", "temp",
}
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# SQL blocklist — mirrors app/api/query.py lines 40-63
# SQL blocklist — based on app/api/query.py, extended with additional DuckDB metadata tables
# ---------------------------------------------------------------------------
_BLOCKED_KEYWORDS: List[str] = [
@ -211,6 +221,17 @@ class RemoteQueryEngine:
RemoteQueryError: For row/memory limits or BQ errors.
ImportError: If google-cloud-bigquery is not installed.
"""
if not _SAFE_IDENTIFIER.match(alias or ""):
raise RemoteQueryError(
f"Invalid alias {alias!r}: must be a valid SQL identifier",
error_type="query_error",
)
if alias.lower() in _RESERVED_ALIASES:
raise RemoteQueryError(
f"Reserved alias {alias!r}: cannot shadow system objects",
error_type="query_error",
)
_validate_sql(bq_sql)
client = self._get_bq_client()

View file

@ -102,6 +102,27 @@ class TestRemoteQueryEngineRegister:
assert exc_info.value.error_type == "row_limit"
assert exc_info.value.details["count"] == 1_000_000
def test_register_bq_invalid_alias(self, analytics_conn):
engine = RemoteQueryEngine(analytics_conn)
# Space in alias — invalid identifier
with pytest.raises(RemoteQueryError) as exc_info:
engine.register_bq("bad alias", "SELECT 1")
assert exc_info.value.error_type == "query_error"
# Reserved alias — information_schema
with pytest.raises(RemoteQueryError) as exc_info:
engine.register_bq("information_schema", "SELECT 1")
assert exc_info.value.error_type == "query_error"
# Valid alias — should not raise from alias validation
# (will raise later trying to reach BQ without a client, but not from alias check)
try:
engine.register_bq("valid_name", "SELECT 1")
except RemoteQueryError as exc:
assert exc.error_type != "query_error" or "Invalid alias" not in str(exc)
except (ImportError, ModuleNotFoundError):
pass # Expected — no BQ package in test env
def test_register_bq_missing_package(self, analytics_conn):
"""When google-cloud-bigquery is not installed, engine must raise ImportError."""
engine = RemoteQueryEngine(