Schema v3: add is_public column to table_registry (default true). src/rbac.py: can_access_table() checks admin bypass, public flag, explicit permissions, wildcard bucket permissions. API enforcement: - manifest: filters tables by user access - download: 403 if no access - catalog: filters table list - query: validates referenced tables against allowed list New admin permissions API (/api/admin/permissions) for grant/revoke. 28 access control tests + 733 total tests passing.
63 lines
2 KiB
Python
63 lines
2 KiB
Python
"""Data download endpoint — streaming parquet files."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
from fastapi.responses import FileResponse
|
|
import duckdb
|
|
|
|
from app.auth.dependencies import get_current_user, _get_db
|
|
from src.rbac import can_access_table
|
|
|
|
router = APIRouter(prefix="/api/data", tags=["data"])
|
|
|
|
|
|
def _get_data_dir() -> Path:
|
|
return Path(os.environ.get("DATA_DIR", "./data"))
|
|
|
|
|
|
@router.get("/{table_id}/download")
|
|
async def download_table(
|
|
table_id: str,
|
|
request: Request,
|
|
user: dict = Depends(get_current_user),
|
|
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
|
):
|
|
"""Stream a parquet file for download. Supports ETag for caching."""
|
|
# Check access FIRST
|
|
if not can_access_table(user, table_id, conn):
|
|
raise HTTPException(status_code=403, detail="Access denied to this table")
|
|
|
|
data_dir = _get_data_dir()
|
|
|
|
# Search in extracts directory (v2 extract.duckdb architecture)
|
|
extracts_dir = data_dir / "extracts"
|
|
candidates = list(extracts_dir.rglob(f"data/{table_id}.parquet")) if extracts_dir.exists() else []
|
|
|
|
# Fallback to legacy path for backward compatibility
|
|
if not candidates:
|
|
parquet_dir = data_dir / "src_data" / "parquet"
|
|
candidates = list(parquet_dir.rglob(f"{table_id}.parquet"))
|
|
if not candidates:
|
|
candidates = list(parquet_dir.rglob(f"*/{table_id}.parquet"))
|
|
|
|
if not candidates:
|
|
raise HTTPException(status_code=404, detail=f"Table '{table_id}' not found")
|
|
|
|
file_path = candidates[0]
|
|
|
|
# ETag support
|
|
stat = file_path.stat()
|
|
etag = f'"{stat.st_mtime_ns}"'
|
|
if_none_match = request.headers.get("if-none-match")
|
|
if if_none_match == etag:
|
|
from starlette.responses import Response
|
|
return Response(status_code=304)
|
|
|
|
return FileResponse(
|
|
path=file_path,
|
|
filename=f"{table_id}.parquet",
|
|
media_type="application/octet-stream",
|
|
headers={"ETag": etag},
|
|
)
|