fix: alias validation, url escaping, read-only CLI, blocklist comment
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
872b06ffae
commit
f4129dc87d
4 changed files with 47 additions and 4 deletions
|
|
@ -105,7 +105,7 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
|
||||||
typer.echo("Local DuckDB not found. Run: da sync", err=True)
|
typer.echo("Local DuckDB not found. Run: da sync", err=True)
|
||||||
raise typer.Exit(1)
|
raise typer.Exit(1)
|
||||||
|
|
||||||
conn = duckdb.connect(str(db_path))
|
conn = duckdb.connect(str(db_path), read_only=True)
|
||||||
try:
|
try:
|
||||||
config = load_config()
|
config = load_config()
|
||||||
engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in (
|
engine = RemoteQueryEngine(conn, **{k: v for k, v in config.items() if k in (
|
||||||
|
|
|
||||||
|
|
@ -321,14 +321,15 @@ def _reattach_remote_extensions(
|
||||||
try:
|
try:
|
||||||
conn.execute(f"LOAD {extension};")
|
conn.execute(f"LOAD {extension};")
|
||||||
token = os.environ.get(token_env, "") if token_env else ""
|
token = os.environ.get(token_env, "") if token_env else ""
|
||||||
|
safe_url = url.replace("'", "''")
|
||||||
if token:
|
if token:
|
||||||
escaped_token = token.replace("'", "''")
|
escaped_token = token.replace("'", "''")
|
||||||
conn.execute(
|
conn.execute(
|
||||||
f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
|
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)"
|
f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)"
|
||||||
)
|
)
|
||||||
attached_dbs.add(alias)
|
attached_dbs.add(alias)
|
||||||
logger.debug("Re-attached remote source %s via %s extension", alias, extension)
|
logger.debug("Re-attached remote source %s via %s extension", alias, extension)
|
||||||
|
|
|
||||||
|
|
@ -11,14 +11,24 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import duckdb
|
import duckdb
|
||||||
|
|
||||||
|
_SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
|
||||||
|
|
||||||
|
_RESERVED_ALIASES = {
|
||||||
|
"information_schema", "duckdb_tables", "duckdb_columns",
|
||||||
|
"duckdb_databases", "duckdb_settings", "duckdb_functions",
|
||||||
|
"duckdb_views", "duckdb_indexes", "duckdb_schemas",
|
||||||
|
"main", "memory", "system", "temp",
|
||||||
|
}
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SQL blocklist — mirrors app/api/query.py lines 40-63
|
# SQL blocklist — based on app/api/query.py, extended with additional DuckDB metadata tables
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_BLOCKED_KEYWORDS: List[str] = [
|
_BLOCKED_KEYWORDS: List[str] = [
|
||||||
|
|
@ -211,6 +221,17 @@ class RemoteQueryEngine:
|
||||||
RemoteQueryError: For row/memory limits or BQ errors.
|
RemoteQueryError: For row/memory limits or BQ errors.
|
||||||
ImportError: If google-cloud-bigquery is not installed.
|
ImportError: If google-cloud-bigquery is not installed.
|
||||||
"""
|
"""
|
||||||
|
if not _SAFE_IDENTIFIER.match(alias or ""):
|
||||||
|
raise RemoteQueryError(
|
||||||
|
f"Invalid alias {alias!r}: must be a valid SQL identifier",
|
||||||
|
error_type="query_error",
|
||||||
|
)
|
||||||
|
if alias.lower() in _RESERVED_ALIASES:
|
||||||
|
raise RemoteQueryError(
|
||||||
|
f"Reserved alias {alias!r}: cannot shadow system objects",
|
||||||
|
error_type="query_error",
|
||||||
|
)
|
||||||
|
|
||||||
_validate_sql(bq_sql)
|
_validate_sql(bq_sql)
|
||||||
|
|
||||||
client = self._get_bq_client()
|
client = self._get_bq_client()
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,27 @@ class TestRemoteQueryEngineRegister:
|
||||||
assert exc_info.value.error_type == "row_limit"
|
assert exc_info.value.error_type == "row_limit"
|
||||||
assert exc_info.value.details["count"] == 1_000_000
|
assert exc_info.value.details["count"] == 1_000_000
|
||||||
|
|
||||||
|
def test_register_bq_invalid_alias(self, analytics_conn):
|
||||||
|
engine = RemoteQueryEngine(analytics_conn)
|
||||||
|
# Space in alias — invalid identifier
|
||||||
|
with pytest.raises(RemoteQueryError) as exc_info:
|
||||||
|
engine.register_bq("bad alias", "SELECT 1")
|
||||||
|
assert exc_info.value.error_type == "query_error"
|
||||||
|
|
||||||
|
# Reserved alias — information_schema
|
||||||
|
with pytest.raises(RemoteQueryError) as exc_info:
|
||||||
|
engine.register_bq("information_schema", "SELECT 1")
|
||||||
|
assert exc_info.value.error_type == "query_error"
|
||||||
|
|
||||||
|
# Valid alias — should not raise from alias validation
|
||||||
|
# (will raise later trying to reach BQ without a client, but not from alias check)
|
||||||
|
try:
|
||||||
|
engine.register_bq("valid_name", "SELECT 1")
|
||||||
|
except RemoteQueryError as exc:
|
||||||
|
assert exc.error_type != "query_error" or "Invalid alias" not in str(exc)
|
||||||
|
except (ImportError, ModuleNotFoundError):
|
||||||
|
pass # Expected — no BQ package in test env
|
||||||
|
|
||||||
def test_register_bq_missing_package(self, analytics_conn):
|
def test_register_bq_missing_package(self, analytics_conn):
|
||||||
"""When google-cloud-bigquery is not installed, engine must raise ImportError."""
|
"""When google-cloud-bigquery is not installed, engine must raise ImportError."""
|
||||||
engine = RemoteQueryEngine(
|
engine = RemoteQueryEngine(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue