security: harden query (read-only DB), uploads (path sanitization), scripts (AST validation)

This commit is contained in:
ZdenekSrotyr 2026-04-08 12:09:19 +02:00
parent 224635b88d
commit 05a1b452e9
5 changed files with 52 additions and 5 deletions

View file

@ -8,7 +8,7 @@ from pydantic import BaseModel
import duckdb import duckdb
from app.auth.dependencies import get_current_user, _get_db from app.auth.dependencies import get_current_user, _get_db
from src.db import get_analytics_db from src.db import get_analytics_db_readonly
from src.rbac import get_accessible_tables from src.rbac import get_accessible_tables
router = APIRouter(prefix="/api/query", tags=["query"]) router = APIRouter(prefix="/api/query", tags=["query"])
@ -43,6 +43,7 @@ async def execute_query(
# File access functions # File access functions
"read_csv", "read_json", "read_parquet(", "read_text", "read_csv", "read_json", "read_parquet(", "read_text",
"write_csv", "write_parquet", "write_csv", "write_parquet",
"read_blob", "glob(", "read_ndjson", "'/", '"/',
# Multiple statements # Multiple statements
";", ";",
] ]
@ -55,7 +56,7 @@ async def execute_query(
# Get allowed tables for this user # Get allowed tables for this user
allowed = get_accessible_tables(user, conn) allowed = get_accessible_tables(user, conn)
analytics = get_analytics_db() analytics = get_analytics_db_readonly()
try: try:
if allowed is not None: # None = admin, sees all if allowed is not None: # None = admin, sees all
# Get all views in analytics DB # Get all views in analytics DB

View file

@ -143,6 +143,30 @@ def _execute_script(source: str, name: str) -> dict:
"delattr(", "delattr(",
"breakpoint(", "breakpoint(",
] ]
import ast
BLOCKED_MODULES = {"os", "sys", "subprocess", "shutil", "ctypes", "importlib", "socket",
"requests", "urllib", "http", "signal", "pathlib", "builtins"}
BLOCKED_FUNCTIONS = {"exec", "eval", "compile", "open", "globals", "locals",
"getattr", "setattr", "delattr", "breakpoint", "__import__"}
try:
tree = ast.parse(source)
except SyntaxError as e:
raise HTTPException(status_code=400, detail=f"Script syntax error: {e}")
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
if alias.name.split(".")[0] in BLOCKED_MODULES:
raise HTTPException(status_code=400, detail=f"Blocked import: {alias.name}")
elif isinstance(node, ast.ImportFrom):
if node.module and node.module.split(".")[0] in BLOCKED_MODULES:
raise HTTPException(status_code=400, detail=f"Blocked import: {node.module}")
elif isinstance(node, ast.Call):
if isinstance(node.func, ast.Name) and node.func.id in BLOCKED_FUNCTIONS:
raise HTTPException(status_code=400, detail=f"Blocked function: {node.func.id}")
source_lower = source.lower() source_lower = source.lower()
for pattern in blocked_patterns: for pattern in blocked_patterns:
if pattern.lower() in source_lower: if pattern.lower() in source_lower:

View file

@ -4,6 +4,7 @@ import os
import uuid import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from pathlib import Path as _Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
from pydantic import BaseModel from pydantic import BaseModel
@ -27,7 +28,10 @@ async def upload_session(
sessions_dir = _get_data_dir() / "user_sessions" / user_id sessions_dir = _get_data_dir() / "user_sessions" / user_id
sessions_dir.mkdir(parents=True, exist_ok=True) sessions_dir.mkdir(parents=True, exist_ok=True)
filename = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl" raw_name = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl"
filename = _Path(raw_name).name # Strips directory traversal components
if not filename or filename.startswith("."):
filename = f"upload_{uuid.uuid4().hex[:8]}"
target = sessions_dir / filename target = sessions_dir / filename
content = await file.read() content = await file.read()
target.write_bytes(content) target.write_bytes(content)
@ -44,7 +48,10 @@ async def upload_artifact(
artifacts_dir = _get_data_dir() / "user_artifacts" / user_id artifacts_dir = _get_data_dir() / "user_artifacts" / user_id
artifacts_dir.mkdir(parents=True, exist_ok=True) artifacts_dir.mkdir(parents=True, exist_ok=True)
filename = file.filename or f"artifact_{uuid.uuid4().hex[:8]}" raw_name = file.filename or f"artifact_{uuid.uuid4().hex[:8]}"
filename = _Path(raw_name).name # Strips directory traversal components
if not filename or filename.startswith("."):
filename = f"upload_{uuid.uuid4().hex[:8]}"
target = artifacts_dir / filename target = artifacts_dir / filename
content = await file.read() content = await file.read()
target.write_bytes(content) target.write_bytes(content)

View file

@ -212,6 +212,20 @@ def get_analytics_db() -> duckdb.DuckDBPyConnection:
return duckdb.connect(str(db_path)) return duckdb.connect(str(db_path))
def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection:
"""Read-only connection to analytics DB. Blocks writes and external access."""
db_path = _get_data_dir() / "analytics" / "server.duckdb"
if not db_path.exists():
db_path.parent.mkdir(parents=True, exist_ok=True)
return duckdb.connect(str(db_path), read_only=False) # Can't open read-only if new
conn = duckdb.connect(str(db_path), read_only=True)
try:
conn.execute("SET enable_external_access = false")
except Exception:
pass # Older DuckDB may not support this
return conn
_V1_TO_V2_MIGRATIONS = [ _V1_TO_V2_MIGRATIONS = [
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS source_type VARCHAR", "ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS source_type VARCHAR",
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS bucket VARCHAR", "ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS bucket VARCHAR",

View file

@ -75,7 +75,8 @@ class TestScriptsAPI:
"source": "import subprocess; subprocess.run(['ls'])", "name": "bad", "source": "import subprocess; subprocess.run(['ls'])", "name": "bad",
}, headers=headers) }, headers=headers)
assert resp.status_code == 400 assert resp.status_code == 400
assert "disallowed" in resp.json()["detail"] detail = resp.json()["detail"]
assert "disallowed" in detail or "Blocked" in detail
def test_deploy_run_undeploy(self, client): def test_deploy_run_undeploy(self, client):
c, _, analyst_token = client c, _, analyst_token = client