security: harden query endpoint SQL blocklist and disable external access
Expand blocked keywords to cover parquet_scan, read_csv_auto, query_table, iceberg_scan, delta_scan, call, URL schemes (http/https/s3/gcs), and additional file-scan functions. Set enable_external_access=false on the non-read-only analytics connection path. Add three new tests covering parquet_scan, read_csv_auto, and query_table blocking.
This commit is contained in:
parent
4aa97c23d2
commit
23ae6a602c
3 changed files with 33 additions and 5 deletions
|
|
@ -39,11 +39,15 @@ async def execute_query(
|
|||
blocked = [
|
||||
"drop ", "delete ", "insert ", "update ", "alter ", "create ",
|
||||
"copy ", "attach ", "detach ", "load ", "install ",
|
||||
"export ", "import ", "pragma ",
|
||||
"export ", "import ", "pragma ", "call ",
|
||||
# File access functions
|
||||
"read_csv", "read_json", "read_parquet(", "read_text",
|
||||
"write_csv", "write_parquet",
|
||||
"read_blob", "glob(", "read_ndjson", "'/", '"/',
|
||||
"read_csv", "read_json", "read_parquet", "read_text",
|
||||
"write_csv", "write_parquet", "read_blob", "read_ndjson",
|
||||
"parquet_scan", "parquet_metadata", "parquet_schema",
|
||||
"json_scan", "csv_scan",
|
||||
"query_table", "iceberg_scan", "delta_scan",
|
||||
"glob(", "list_files",
|
||||
"'/", '"/','http://', 'https://', 's3://', 'gcs://',
|
||||
# Multiple statements
|
||||
";",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -220,7 +220,12 @@ def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection:
|
|||
db_path = _get_data_dir() / "analytics" / "server.duckdb"
|
||||
if not db_path.exists():
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
return duckdb.connect(str(db_path), read_only=False)
|
||||
conn = duckdb.connect(str(db_path), read_only=False)
|
||||
try:
|
||||
conn.execute("SET enable_external_access = false")
|
||||
except Exception:
|
||||
pass
|
||||
return conn
|
||||
conn = duckdb.connect(str(db_path), read_only=True)
|
||||
# ATTACH extract.duckdb files FIRST so views referencing them work
|
||||
extracts_dir = _get_data_dir() / "extracts"
|
||||
|
|
|
|||
|
|
@ -164,6 +164,25 @@ class TestQuerySecurity:
|
|||
headers=_headers(token))
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
def test_blocks_parquet_scan(self, client):
|
||||
c, token = client
|
||||
resp = c.post("/api/query", json={"sql": "SELECT * FROM parquet_scan('/data/extracts/secret.parquet')"},
|
||||
headers=_headers(token))
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_blocks_read_csv_auto(self, client):
|
||||
c, token = client
|
||||
resp = c.post("/api/query", json={"sql": "SELECT * FROM read_csv_auto('/etc/passwd')"},
|
||||
headers=_headers(token))
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_blocks_query_table(self, client):
|
||||
c, token = client
|
||||
resp = c.post("/api/query", json={"sql": "SELECT * FROM query_table('secret_table')"},
|
||||
headers=_headers(token))
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_no_auth(self, client):
|
||||
c, _ = client
|
||||
resp = c.post("/api/query", json={"sql": "SELECT 1"})
|
||||
|
|
|
|||
Loading…
Reference in a new issue