security: harden query endpoint SQL blocklist and disable external access

Expand blocked keywords to cover parquet_scan, read_csv_auto, query_table,
iceberg_scan, delta_scan, call, URL schemes (http/https/s3/gcs), and
additional file-scan functions. Set enable_external_access=false on the
non-read-only analytics connection path. Add three new tests covering
parquet_scan, read_csv_auto, and query_table blocking.
This commit is contained in:
ZdenekSrotyr 2026-04-09 06:54:58 +02:00
parent 4aa97c23d2
commit 23ae6a602c
3 changed files with 33 additions and 5 deletions

View file

@ -39,11 +39,15 @@ async def execute_query(
blocked = [ blocked = [
"drop ", "delete ", "insert ", "update ", "alter ", "create ", "drop ", "delete ", "insert ", "update ", "alter ", "create ",
"copy ", "attach ", "detach ", "load ", "install ", "copy ", "attach ", "detach ", "load ", "install ",
"export ", "import ", "pragma ", "export ", "import ", "pragma ", "call ",
# File access functions # File access functions
"read_csv", "read_json", "read_parquet(", "read_text", "read_csv", "read_json", "read_parquet", "read_text",
"write_csv", "write_parquet", "write_csv", "write_parquet", "read_blob", "read_ndjson",
"read_blob", "glob(", "read_ndjson", "'/", '"/', "parquet_scan", "parquet_metadata", "parquet_schema",
"json_scan", "csv_scan",
"query_table", "iceberg_scan", "delta_scan",
"glob(", "list_files",
"'/", '"/','http://', 'https://', 's3://', 'gcs://',
# Multiple statements # Multiple statements
";", ";",
] ]

View file

@ -220,7 +220,12 @@ def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection:
db_path = _get_data_dir() / "analytics" / "server.duckdb" db_path = _get_data_dir() / "analytics" / "server.duckdb"
if not db_path.exists(): if not db_path.exists():
db_path.parent.mkdir(parents=True, exist_ok=True) db_path.parent.mkdir(parents=True, exist_ok=True)
return duckdb.connect(str(db_path), read_only=False) conn = duckdb.connect(str(db_path), read_only=False)
try:
conn.execute("SET enable_external_access = false")
except Exception:
pass
return conn
conn = duckdb.connect(str(db_path), read_only=True) conn = duckdb.connect(str(db_path), read_only=True)
# ATTACH extract.duckdb files FIRST so views referencing them work # ATTACH extract.duckdb files FIRST so views referencing them work
extracts_dir = _get_data_dir() / "extracts" extracts_dir = _get_data_dir() / "extracts"

View file

@ -164,6 +164,25 @@ class TestQuerySecurity:
headers=_headers(token)) headers=_headers(token))
assert resp.status_code == 400 assert resp.status_code == 400
def test_blocks_parquet_scan(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM parquet_scan('/data/extracts/secret.parquet')"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_read_csv_auto(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM read_csv_auto('/etc/passwd')"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_query_table(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM query_table('secret_table')"},
headers=_headers(token))
assert resp.status_code == 400
def test_no_auth(self, client): def test_no_auth(self, client):
c, _ = client c, _ = client
resp = c.post("/api/query", json={"sql": "SELECT 1"}) resp = c.post("/api/query", json={"sql": "SELECT 1"})