From f4129dc87d42b1cc80f37bb78235883d791592d1 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Sat, 11 Apr 2026 11:28:27 +0200 Subject: [PATCH] fix: alias validation, url escaping, read-only CLI, blocklist comment Co-Authored-By: Claude Sonnet 4.6 --- cli/commands/query.py | 2 +- src/db.py | 5 +++-- src/remote_query.py | 23 ++++++++++++++++++++++- tests/test_remote_query.py | 21 +++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/cli/commands/query.py b/cli/commands/query.py index 022c6c5..2b039c3 100644 --- a/cli/commands/query.py +++ b/cli/commands/query.py @@ -105,7 +105,7 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]): typer.echo("Local DuckDB not found. Run: da sync", err=True) raise typer.Exit(1) - conn = duckdb.connect(str(db_path)) + conn = duckdb.connect(str(db_path), read_only=True) try: config = load_config() engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in ( diff --git a/src/db.py b/src/db.py index 66c9d0a..408f3e8 100644 --- a/src/db.py +++ b/src/db.py @@ -321,14 +321,15 @@ def _reattach_remote_extensions( try: conn.execute(f"LOAD {extension};") token = os.environ.get(token_env, "") if token_env else "" + safe_url = url.replace("'", "''") if token: escaped_token = token.replace("'", "''") conn.execute( - f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')" + f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')" ) else: conn.execute( - f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)" + f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)" ) attached_dbs.add(alias) logger.debug("Re-attached remote source %s via %s extension", alias, extension) diff --git a/src/remote_query.py b/src/remote_query.py index 2b50f1b..d14965d 100644 --- a/src/remote_query.py +++ b/src/remote_query.py @@ -11,14 +11,24 @@ from __future__ import annotations import logging import os +import re from typing import Any, Dict, List, Optional import duckdb +_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$") + +_RESERVED_ALIASES = { + "information_schema", "duckdb_tables", "duckdb_columns", + "duckdb_databases", "duckdb_settings", "duckdb_functions", + "duckdb_views", "duckdb_indexes", "duckdb_schemas", + "main", "memory", "system", "temp", +} + logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- -# SQL blocklist — mirrors app/api/query.py lines 40-63 +# SQL blocklist — based on app/api/query.py, extended with additional DuckDB metadata tables # --------------------------------------------------------------------------- _BLOCKED_KEYWORDS: List[str] = [ @@ -211,6 +221,17 @@ class RemoteQueryEngine: RemoteQueryError: For row/memory limits or BQ errors. ImportError: If google-cloud-bigquery is not installed. """ + if not _SAFE_IDENTIFIER.match(alias or ""): + raise RemoteQueryError( + f"Invalid alias {alias!r}: must be a valid SQL identifier", + error_type="query_error", + ) + if alias.lower() in _RESERVED_ALIASES: + raise RemoteQueryError( + f"Reserved alias {alias!r}: cannot shadow system objects", + error_type="query_error", + ) + _validate_sql(bq_sql) client = self._get_bq_client() diff --git a/tests/test_remote_query.py b/tests/test_remote_query.py index cae7208..36e186f 100644 --- a/tests/test_remote_query.py +++ b/tests/test_remote_query.py @@ -102,6 +102,27 @@ class TestRemoteQueryEngineRegister: assert exc_info.value.error_type == "row_limit" assert exc_info.value.details["count"] == 1_000_000 + def test_register_bq_invalid_alias(self, analytics_conn): + engine = RemoteQueryEngine(analytics_conn) + # Space in alias — invalid identifier + with pytest.raises(RemoteQueryError) as exc_info: + engine.register_bq("bad alias", "SELECT 1") + assert exc_info.value.error_type == "query_error" + + # Reserved alias — information_schema + with pytest.raises(RemoteQueryError) as exc_info: + engine.register_bq("information_schema", "SELECT 1") + assert exc_info.value.error_type == "query_error" + + # Valid alias — should not raise from alias validation + # (will raise later trying to reach BQ without a client, but not from alias check) + try: + engine.register_bq("valid_name", "SELECT 1") + except RemoteQueryError as exc: + assert exc.error_type != "query_error" or "Invalid alias" not in str(exc) + except (ImportError, ModuleNotFoundError): + pass # Expected — no BQ package in test env + def test_register_bq_missing_package(self, analytics_conn): """When google-cloud-bigquery is not installed, engine must raise ImportError.""" engine = RemoteQueryEngine(