security: harden query (read-only DB), uploads (path sanitization), scripts (AST validation)
This commit is contained in:
parent
224635b88d
commit
05a1b452e9
5 changed files with 52 additions and 5 deletions
|
|
@ -8,7 +8,7 @@ from pydantic import BaseModel
|
||||||
import duckdb
|
import duckdb
|
||||||
|
|
||||||
from app.auth.dependencies import get_current_user, _get_db
|
from app.auth.dependencies import get_current_user, _get_db
|
||||||
from src.db import get_analytics_db
|
from src.db import get_analytics_db_readonly
|
||||||
from src.rbac import get_accessible_tables
|
from src.rbac import get_accessible_tables
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/query", tags=["query"])
|
router = APIRouter(prefix="/api/query", tags=["query"])
|
||||||
|
|
@ -43,6 +43,7 @@ async def execute_query(
|
||||||
# File access functions
|
# File access functions
|
||||||
"read_csv", "read_json", "read_parquet(", "read_text",
|
"read_csv", "read_json", "read_parquet(", "read_text",
|
||||||
"write_csv", "write_parquet",
|
"write_csv", "write_parquet",
|
||||||
|
"read_blob", "glob(", "read_ndjson", "'/", '"/',
|
||||||
# Multiple statements
|
# Multiple statements
|
||||||
";",
|
";",
|
||||||
]
|
]
|
||||||
|
|
@ -55,7 +56,7 @@ async def execute_query(
|
||||||
# Get allowed tables for this user
|
# Get allowed tables for this user
|
||||||
allowed = get_accessible_tables(user, conn)
|
allowed = get_accessible_tables(user, conn)
|
||||||
|
|
||||||
analytics = get_analytics_db()
|
analytics = get_analytics_db_readonly()
|
||||||
try:
|
try:
|
||||||
if allowed is not None: # None = admin, sees all
|
if allowed is not None: # None = admin, sees all
|
||||||
# Get all views in analytics DB
|
# Get all views in analytics DB
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,30 @@ def _execute_script(source: str, name: str) -> dict:
|
||||||
"delattr(",
|
"delattr(",
|
||||||
"breakpoint(",
|
"breakpoint(",
|
||||||
]
|
]
|
||||||
|
import ast
|
||||||
|
|
||||||
|
BLOCKED_MODULES = {"os", "sys", "subprocess", "shutil", "ctypes", "importlib", "socket",
|
||||||
|
"requests", "urllib", "http", "signal", "pathlib", "builtins"}
|
||||||
|
BLOCKED_FUNCTIONS = {"exec", "eval", "compile", "open", "globals", "locals",
|
||||||
|
"getattr", "setattr", "delattr", "breakpoint", "__import__"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(source)
|
||||||
|
except SyntaxError as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Script syntax error: {e}")
|
||||||
|
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
if isinstance(node, ast.Import):
|
||||||
|
for alias in node.names:
|
||||||
|
if alias.name.split(".")[0] in BLOCKED_MODULES:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Blocked import: {alias.name}")
|
||||||
|
elif isinstance(node, ast.ImportFrom):
|
||||||
|
if node.module and node.module.split(".")[0] in BLOCKED_MODULES:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Blocked import: {node.module}")
|
||||||
|
elif isinstance(node, ast.Call):
|
||||||
|
if isinstance(node.func, ast.Name) and node.func.id in BLOCKED_FUNCTIONS:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Blocked function: {node.func.id}")
|
||||||
|
|
||||||
source_lower = source.lower()
|
source_lower = source.lower()
|
||||||
for pattern in blocked_patterns:
|
for pattern in blocked_patterns:
|
||||||
if pattern.lower() in source_lower:
|
if pattern.lower() in source_lower:
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import os
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
@ -27,7 +28,10 @@ async def upload_session(
|
||||||
sessions_dir = _get_data_dir() / "user_sessions" / user_id
|
sessions_dir = _get_data_dir() / "user_sessions" / user_id
|
||||||
sessions_dir.mkdir(parents=True, exist_ok=True)
|
sessions_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
filename = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl"
|
raw_name = file.filename or f"session_{uuid.uuid4().hex[:8]}.jsonl"
|
||||||
|
filename = _Path(raw_name).name # Strips directory traversal components
|
||||||
|
if not filename or filename.startswith("."):
|
||||||
|
filename = f"upload_{uuid.uuid4().hex[:8]}"
|
||||||
target = sessions_dir / filename
|
target = sessions_dir / filename
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
target.write_bytes(content)
|
target.write_bytes(content)
|
||||||
|
|
@ -44,7 +48,10 @@ async def upload_artifact(
|
||||||
artifacts_dir = _get_data_dir() / "user_artifacts" / user_id
|
artifacts_dir = _get_data_dir() / "user_artifacts" / user_id
|
||||||
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
filename = file.filename or f"artifact_{uuid.uuid4().hex[:8]}"
|
raw_name = file.filename or f"artifact_{uuid.uuid4().hex[:8]}"
|
||||||
|
filename = _Path(raw_name).name # Strips directory traversal components
|
||||||
|
if not filename or filename.startswith("."):
|
||||||
|
filename = f"upload_{uuid.uuid4().hex[:8]}"
|
||||||
target = artifacts_dir / filename
|
target = artifacts_dir / filename
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
target.write_bytes(content)
|
target.write_bytes(content)
|
||||||
|
|
|
||||||
14
src/db.py
14
src/db.py
|
|
@ -212,6 +212,20 @@ def get_analytics_db() -> duckdb.DuckDBPyConnection:
|
||||||
return duckdb.connect(str(db_path))
|
return duckdb.connect(str(db_path))
|
||||||
|
|
||||||
|
|
||||||
|
def get_analytics_db_readonly() -> duckdb.DuckDBPyConnection:
|
||||||
|
"""Read-only connection to analytics DB. Blocks writes and external access."""
|
||||||
|
db_path = _get_data_dir() / "analytics" / "server.duckdb"
|
||||||
|
if not db_path.exists():
|
||||||
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
return duckdb.connect(str(db_path), read_only=False) # Can't open read-only if new
|
||||||
|
conn = duckdb.connect(str(db_path), read_only=True)
|
||||||
|
try:
|
||||||
|
conn.execute("SET enable_external_access = false")
|
||||||
|
except Exception:
|
||||||
|
pass # Older DuckDB may not support this
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
_V1_TO_V2_MIGRATIONS = [
|
_V1_TO_V2_MIGRATIONS = [
|
||||||
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS source_type VARCHAR",
|
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS source_type VARCHAR",
|
||||||
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS bucket VARCHAR",
|
"ALTER TABLE table_registry ADD COLUMN IF NOT EXISTS bucket VARCHAR",
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,8 @@ class TestScriptsAPI:
|
||||||
"source": "import subprocess; subprocess.run(['ls'])", "name": "bad",
|
"source": "import subprocess; subprocess.run(['ls'])", "name": "bad",
|
||||||
}, headers=headers)
|
}, headers=headers)
|
||||||
assert resp.status_code == 400
|
assert resp.status_code == 400
|
||||||
assert "disallowed" in resp.json()["detail"]
|
detail = resp.json()["detail"]
|
||||||
|
assert "disallowed" in detail or "Blocked" in detail
|
||||||
|
|
||||||
def test_deploy_run_undeploy(self, client):
|
def test_deploy_run_undeploy(self, client):
|
||||||
c, _, analyst_token = client
|
c, _, analyst_token = client
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue