fix: block DuckDB metadata functions and relative paths in query endpoint

Add information_schema, duckdb_* introspection functions, pragma_* functions,
and relative path traversal patterns to the SQL blocklist so users cannot
enumerate schema metadata regardless of RBAC. Add six corresponding tests.
This commit is contained in:
ZdenekSrotyr 2026-04-09 16:29:11 +02:00
parent 86fe4b411d
commit 55515266ea
2 changed files with 43 additions and 0 deletions

View file

@ -49,6 +49,13 @@ async def execute_query(
"query_table", "iceberg_scan", "delta_scan",
"glob(", "list_files",
"'/", '"/','http://', 'https://', 's3://', 'gcs://',
# DuckDB metadata (leaks schema info regardless of RBAC)
"information_schema", "duckdb_tables", "duckdb_columns",
"duckdb_databases", "duckdb_settings", "duckdb_functions",
"duckdb_views", "duckdb_indexes", "duckdb_schemas",
"pragma_table_info", "pragma_storage_info",
# Relative path traversal
"'../", '"../',
# Multiple statements
";",
]

View file

@ -220,6 +220,42 @@ class TestQuerySecurity:
# but not with 403 access denied. The regex logic is sound if test_word_boundary_match_no_false_positive passes.
assert resp.status_code in [400, 200] # Either query error or success
def test_blocks_information_schema(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT table_name FROM information_schema.tables"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_duckdb_tables(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM duckdb_tables()"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_duckdb_columns(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM duckdb_columns()"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_duckdb_databases(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM duckdb_databases()"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_relative_path(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM read_parquet('../secret/data.parquet')"},
headers=_headers(token))
assert resp.status_code == 400
def test_blocks_pragma_table_info(self, client):
c, token = client
resp = c.post("/api/query", json={"sql": "SELECT * FROM pragma_table_info('users')"},
headers=_headers(token))
assert resp.status_code == 400
# ---- Auth Edge Cases ----