diff --git a/app/api/query.py b/app/api/query.py index 040385f..75efc5d 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -8,7 +8,7 @@ from pydantic import BaseModel import duckdb from app.auth.dependencies import get_current_user, _get_db -from src.db import get_analytics_db +from src.db import get_analytics_db_readonly from src.rbac import get_accessible_tables router = APIRouter(prefix="/api/query", tags=["query"]) @@ -43,6 +43,7 @@ async def execute_query( # File access functions "read_csv", "read_json", "read_parquet(", "read_text", "write_csv", "write_parquet", + "read_blob", "glob(", "read_ndjson", "'/", '"/', # Multiple statements ";", ] @@ -55,7 +56,7 @@ async def execute_query( # Get allowed tables for this user allowed = get_accessible_tables(user, conn) - analytics = get_analytics_db() + analytics = get_analytics_db_readonly() try: if allowed is not None: # None = admin, sees all # Get all views in analytics DB diff --git a/app/api/scripts.py b/app/api/scripts.py index 66ba56f..10cf0fa 100644 --- a/app/api/scripts.py +++ b/app/api/scripts.py @@ -143,6 +143,30 @@ def _execute_script(source: str, name: str) -> dict: "delattr(", "breakpoint(", ] + import ast + + BLOCKED_MODULES = {"os", "sys", "subprocess", "shutil", "ctypes", "importlib", "socket", + "requests", "urllib", "http", "signal", "pathlib", "builtins"} + BLOCKED_FUNCTIONS = {"exec", "eval", "compile", "open", "globals", "locals", + "getattr", "setattr", "delattr", "breakpoint", "__import__"} + + try: + tree = ast.parse(source) + except SyntaxError as e: + raise HTTPException(status_code=400, detail=f"Script syntax error: {e}") + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + if alias.name.split(".")[0] in BLOCKED_MODULES: + raise HTTPException(status_code=400, detail=f"Blocked import: {alias.name}") + elif isinstance(node, ast.ImportFrom): + if node.module and node.module.split(".")[0] in BLOCKED_MODULES: + raise HTTPException(status_code=400, detail=f"Blocked import: {node.module}") + elif isinstance(node, ast.Call): + if isinstance(node.func, ast.Name) and node.func.id in BLOCKED_FUNCTIONS: + raise HTTPException(status_code=400, detail=f"Blocked function: {node.func.id}") + source_lower = source.lower() for pattern in blocked_patterns: if pattern.lower() in source_lower: diff --git a/app/api/upload.py b/app/api/upload.py index 609f800..b927dbf 100644 --- a/app/api/upload.py +++ b/app/api/upload.py @@ -4,6 +4,7 @@ import os import uuid from datetime import datetime, timezone from pathlib import Path +from pathlib import Path as _Path from fastapi import APIRouter, Depends, HTTPException, UploadFile, File from pydantic import BaseModel @@ -27,7 +28,10 @@ async def upload_session( sessions_dir = _get_data_dir() / "user_sessions" / user_id sessions_dir.mkdir(parents=True, exist_ok=True) - filename = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl" + raw_name = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl" + filename = _Path(raw_name).name # Strips directory traversal components + if not filename or filename.startswith("."): + filename = f"upload_{uuid.uuid4().hex[:8]}" target = sessions_dir / filename content = await file.read() target.write_bytes(content) @@ -44,7 +48,10 @@ async def upload_artifact( artifacts_dir = _get_data_dir() / "user_artifacts" / user_id artifacts_dir.mkdir(parents=True, exist_ok=True) - filename = file.filename or f"artifact_{uuid.uuid4().hex[:8]}" + raw_name = file.filename or f"artifact_{uuid.uuid4().hex[:8]}" + filename = _Path(raw_name).name # Strips directory traversal components + if not filename or filename.startswith("."): + filename = f"upload_{uuid.uuid4().hex[:8]}" target = artifacts_dir / filename content = await file.read() target.write_bytes(content) diff --git a/src/db.py b/src/db.py index a54bfaa..27d60f1 100644 --- a/src/db.py +++ b/src/db.py @@ -212,6 +212,20 @@ def get_analytics_db() -> duckdb.DuckDBPyConnection: return duckdb.connect(str(db_path)) +def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection: + """Read-only connection to analytics DB. Blocks writes and external access.""" + db_path = _get_data_dir() / "analytics" / "server.duckdb" + if not db_path.exists(): + db_path.parent.mkdir(parents=True, exist_ok=True) + return duckdb.connect(str(db_path), read_only=False) # Can't open read-only if new + conn = duckdb.connect(str(db_path), read_only=True) + try: + conn.execute("SET enable_external_access = false") + except Exception: + pass # Older DuckDB may not support this + return conn + + _V1_TO_V2_MIGRATIONS = [ "ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS source_type VARCHAR", "ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS bucket VARCHAR", diff --git a/tests/test_api_scripts.py b/tests/test_api_scripts.py index b9dcae7..2bdeaba 100644 --- a/tests/test_api_scripts.py +++ b/tests/test_api_scripts.py @@ -75,7 +75,8 @@ class TestScriptsAPI: "source": "import subprocess; subprocess.run(['ls'])", "name": "bad", }, headers=headers) assert resp.status_code == 400 - assert "disallowed" in resp.json()["detail"] + detail = resp.json()["detail"] + assert "disallowed" in detail or "Blocked" in detail def test_deploy_run_undeploy(self, client): c, _, analyst_token = client