fix: alias validation, url escaping, read-only CLI, blocklist comment
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
872b06ffae
commit
f4129dc87d
4 changed files with 47 additions and 4 deletions
|
|
@ -105,7 +105,7 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
|
|||
typer.echo("Local DuckDB not found. Run: da sync", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
conn = duckdb.connect(str(db_path))
|
||||
conn = duckdb.connect(str(db_path), read_only=True)
|
||||
try:
|
||||
config = load_config()
|
||||
engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in (
|
||||
|
|
|
|||
|
|
@ -321,14 +321,15 @@ def _reattach_remote_extensions(
|
|||
try:
|
||||
conn.execute(f"LOAD {extension};")
|
||||
token = os.environ.get(token_env, "") if token_env else ""
|
||||
safe_url = url.replace("'", "''")
|
||||
if token:
|
||||
escaped_token = token.replace("'", "''")
|
||||
conn.execute(
|
||||
f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
|
||||
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)"
|
||||
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)"
|
||||
)
|
||||
attached_dbs.add(alias)
|
||||
logger.debug("Re-attached remote source %s via %s extension", alias, extension)
|
||||
|
|
|
|||
|
|
@ -11,14 +11,24 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import duckdb
|
||||
|
||||
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||
|
||||
_RESERVED_ALIASES = {
|
||||
"information_schema", "duckdb_tables", "duckdb_columns",
|
||||
"duckdb_databases", "duckdb_settings", "duckdb_functions",
|
||||
"duckdb_views", "duckdb_indexes", "duckdb_schemas",
|
||||
"main", "memory", "system", "temp",
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL blocklist — mirrors app/api/query.py lines 40-63
|
||||
# SQL blocklist — based on app/api/query.py, extended with additional DuckDB metadata tables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BLOCKED_KEYWORDS: List[str] = [
|
||||
|
|
@ -211,6 +221,17 @@ class RemoteQueryEngine:
|
|||
RemoteQueryError: For row/memory limits or BQ errors.
|
||||
ImportError: If google-cloud-bigquery is not installed.
|
||||
"""
|
||||
if not _SAFE_IDENTIFIER.match(alias or ""):
|
||||
raise RemoteQueryError(
|
||||
f"Invalid alias {alias!r}: must be a valid SQL identifier",
|
||||
error_type="query_error",
|
||||
)
|
||||
if alias.lower() in _RESERVED_ALIASES:
|
||||
raise RemoteQueryError(
|
||||
f"Reserved alias {alias!r}: cannot shadow system objects",
|
||||
error_type="query_error",
|
||||
)
|
||||
|
||||
_validate_sql(bq_sql)
|
||||
|
||||
client = self._get_bq_client()
|
||||
|
|
|
|||
|
|
@ -102,6 +102,27 @@ class TestRemoteQueryEngineRegister:
|
|||
assert exc_info.value.error_type == "row_limit"
|
||||
assert exc_info.value.details["count"] == 1_000_000
|
||||
|
||||
def test_register_bq_invalid_alias(self, analytics_conn):
|
||||
engine = RemoteQueryEngine(analytics_conn)
|
||||
# Space in alias — invalid identifier
|
||||
with pytest.raises(RemoteQueryError) as exc_info:
|
||||
engine.register_bq("bad alias", "SELECT 1")
|
||||
assert exc_info.value.error_type == "query_error"
|
||||
|
||||
# Reserved alias — information_schema
|
||||
with pytest.raises(RemoteQueryError) as exc_info:
|
||||
engine.register_bq("information_schema", "SELECT 1")
|
||||
assert exc_info.value.error_type == "query_error"
|
||||
|
||||
# Valid alias — should not raise from alias validation
|
||||
# (will raise later trying to reach BQ without a client, but not from alias check)
|
||||
try:
|
||||
engine.register_bq("valid_name", "SELECT 1")
|
||||
except RemoteQueryError as exc:
|
||||
assert exc.error_type != "query_error" or "Invalid alias" not in str(exc)
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
pass # Expected — no BQ package in test env
|
||||
|
||||
def test_register_bq_missing_package(self, analytics_conn):
|
||||
"""When google-cloud-bigquery is not installed, engine must raise ImportError."""
|
||||
engine = RemoteQueryEngine(
|
||||
|
|
|
|||
Loading…
Reference in a new issue