From 23ae6a602c284789ec482459d84170a9081563fb Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 9 Apr 2026 06:54:58 +0200 Subject: [PATCH] security: harden query endpoint SQL blocklist and disable external access Expand blocked keywords to cover parquet_scan, read_csv_auto, query_table, iceberg_scan, delta_scan, call, URL schemes (http/https/s3/gcs), and additional file-scan functions. Set enable_external_access=false on the non-read-only analytics connection path. Add three new tests covering parquet_scan, read_csv_auto, and query_table blocking. --- app/api/query.py | 12 ++++++++---- src/db.py | 7 ++++++- tests/test_security.py | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/app/api/query.py b/app/api/query.py index 75efc5d..2405150 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -39,11 +39,15 @@ async def execute_query( blocked = [ "drop ", "delete ", "insert ", "update ", "alter ", "create ", "copy ", "attach ", "detach ", "load ", "install ", - "export ", "import ", "pragma ", + "export ", "import ", "pragma ", "call ", # File access functions - "read_csv", "read_json", "read_parquet(", "read_text", - "write_csv", "write_parquet", - "read_blob", "glob(", "read_ndjson", "'/", '"/', + "read_csv", "read_json", "read_parquet", "read_text", + "write_csv", "write_parquet", "read_blob", "read_ndjson", + "parquet_scan", "parquet_metadata", "parquet_schema", + "json_scan", "csv_scan", + "query_table", "iceberg_scan", "delta_scan", + "glob(", "list_files", + "'/", '"/','http://', 'https://', 's3://', 'gcs://', # Multiple statements ";", ] diff --git a/src/db.py b/src/db.py index 8250746..813b7b5 100644 --- a/src/db.py +++ b/src/db.py @@ -220,7 +220,12 @@ def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection: db_path = _get_data_dir() / "analytics" / "server.duckdb" if not db_path.exists(): db_path.parent.mkdir(parents=True, exist_ok=True) - return duckdb.connect(str(db_path), read_only=False) + conn = duckdb.connect(str(db_path), read_only=False) + try: + conn.execute("SET enable_external_access = false") + except Exception: + pass + return conn conn = duckdb.connect(str(db_path), read_only=True) # ATTACH extract.duckdb files FIRST so views referencing them work extracts_dir = _get_data_dir() / "extracts" diff --git a/tests/test_security.py b/tests/test_security.py index 1da641b..d982e1c 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -164,6 +164,25 @@ class TestQuerySecurity: headers=_headers(token)) assert resp.status_code == 400 + + def test_blocks_parquet_scan(self, client): + c, token = client + resp = c.post("/api/query", json={"sql": "SELECT * FROM parquet_scan('/data/extracts/secret.parquet')"}, + headers=_headers(token)) + assert resp.status_code == 400 + + def test_blocks_read_csv_auto(self, client): + c, token = client + resp = c.post("/api/query", json={"sql": "SELECT * FROM read_csv_auto('/etc/passwd')"}, + headers=_headers(token)) + assert resp.status_code == 400 + + def test_blocks_query_table(self, client): + c, token = client + resp = c.post("/api/query", json={"sql": "SELECT * FROM query_table('secret_table')"}, + headers=_headers(token)) + assert resp.status_code == 400 + def test_no_auth(self, client): c, _ = client resp = c.post("/api/query", json={"sql": "SELECT 1"})