fix: alias validation, url escaping, read-only CLI, blocklist comment

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ZdenekSrotyr 2026-04-11 11:28:27 +02:00
parent 872b06ffae
commit f4129dc87d
4 changed files with 47 additions and 4 deletions

View file

@ -105,7 +105,7 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
typer.echo("Local DuckDB not found. Run: da sync", err=True) typer.echo("Local DuckDB not found. Run: da sync", err=True)
raise typer.Exit(1) raise typer.Exit(1)
conn = duckdb.connect(str(db_path)) conn = duckdb.connect(str(db_path), read_only=True)
try: try:
config = load_config() config = load_config()
engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in ( engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in (

View file

@ -321,14 +321,15 @@ def _reattach_remote_extensions(
try: try:
conn.execute(f"LOAD {extension};") conn.execute(f"LOAD {extension};")
token = os.environ.get(token_env, "") if token_env else "" token = os.environ.get(token_env, "") if token_env else ""
safe_url = url.replace("'", "''")
if token: if token:
escaped_token = token.replace("'", "''") escaped_token = token.replace("'", "''")
conn.execute( conn.execute(
f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')" f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
) )
else: else:
conn.execute( conn.execute(
f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)" f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)"
) )
attached_dbs.add(alias) attached_dbs.add(alias)
logger.debug("Re-attached remote source %s via %s extension", alias, extension) logger.debug("Re-attached remote source %s via %s extension", alias, extension)

View file

@ -11,14 +11,24 @@ from __future__ import annotations
import logging import logging
import os import os
import re
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import duckdb import duckdb
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
_RESERVED_ALIASES = {
"information_schema", "duckdb_tables", "duckdb_columns",
"duckdb_databases", "duckdb_settings", "duckdb_functions",
"duckdb_views", "duckdb_indexes", "duckdb_schemas",
"main", "memory", "system", "temp",
}
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# SQL blocklist — mirrors app/api/query.py lines 40-63 # SQL blocklist — based on app/api/query.py, extended with additional DuckDB metadata tables
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_BLOCKED_KEYWORDS: List[str] = [ _BLOCKED_KEYWORDS: List[str] = [
@ -211,6 +221,17 @@ class RemoteQueryEngine:
RemoteQueryError: For row/memory limits or BQ errors. RemoteQueryError: For row/memory limits or BQ errors.
ImportError: If google-cloud-bigquery is not installed. ImportError: If google-cloud-bigquery is not installed.
""" """
if not _SAFE_IDENTIFIER.match(alias or ""):
raise RemoteQueryError(
f"Invalid alias {alias!r}: must be a valid SQL identifier",
error_type="query_error",
)
if alias.lower() in _RESERVED_ALIASES:
raise RemoteQueryError(
f"Reserved alias {alias!r}: cannot shadow system objects",
error_type="query_error",
)
_validate_sql(bq_sql) _validate_sql(bq_sql)
client = self._get_bq_client() client = self._get_bq_client()

View file

@ -102,6 +102,27 @@ class TestRemoteQueryEngineRegister:
assert exc_info.value.error_type == "row_limit" assert exc_info.value.error_type == "row_limit"
assert exc_info.value.details["count"] == 1_000_000 assert exc_info.value.details["count"] == 1_000_000
def test_register_bq_invalid_alias(self, analytics_conn):
engine = RemoteQueryEngine(analytics_conn)
# Space in alias — invalid identifier
with pytest.raises(RemoteQueryError) as exc_info:
engine.register_bq("bad alias", "SELECT 1")
assert exc_info.value.error_type == "query_error"
# Reserved alias — information_schema
with pytest.raises(RemoteQueryError) as exc_info:
engine.register_bq("information_schema", "SELECT 1")
assert exc_info.value.error_type == "query_error"
# Valid alias — should not raise from alias validation
# (will raise later trying to reach BQ without a client, but not from alias check)
try:
engine.register_bq("valid_name", "SELECT 1")
except RemoteQueryError as exc:
assert exc.error_type != "query_error" or "Invalid alias" not in str(exc)
except (ImportError, ModuleNotFoundError):
pass # Expected — no BQ package in test env
def test_register_bq_missing_package(self, analytics_conn): def test_register_bq_missing_package(self, analytics_conn):
"""When google-cloud-bigquery is not installed, engine must raise ImportError.""" """When google-cloud-bigquery is not installed, engine must raise ImportError."""
engine = RemoteQueryEngine( engine = RemoteQueryEngine(