diff --git a/app/api/metadata.py b/app/api/metadata.py new file mode 100644 index 0000000..c75a3dc --- /dev/null +++ b/app/api/metadata.py @@ -0,0 +1,130 @@ +"""Column metadata API endpoints — CRUD and Keboola push.""" + +import logging +import os +from typing import List, Optional + +import duckdb +import httpx +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel + +from app.auth.dependencies import get_current_user, require_admin, _get_db +from src.repositories.column_metadata import ColumnMetadataRepository +from src.repositories.table_registry import TableRegistryRepository + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["metadata"]) + + +class ColumnMetadataItem(BaseModel): + column_name: str + basetype: Optional[str] = None + description: Optional[str] = None + confidence: str = "manual" + + +class ColumnMetadataSave(BaseModel): + columns: List[ColumnMetadataItem] + + +@router.get("/api/admin/metadata/{table_id}") +async def get_table_metadata( + table_id: str, + user: dict = Depends(get_current_user), + conn: duckdb.DuckDBPyConnection = Depends(_get_db), +): + """Return column metadata for a table.""" + repo = ColumnMetadataRepository(conn) + columns = repo.list_for_table(table_id) + return {"table_id": table_id, "columns": columns} + + +@router.post("/api/admin/metadata/{table_id}") +async def save_table_metadata( + table_id: str, + body: ColumnMetadataSave, + user: dict = Depends(require_admin), + conn: duckdb.DuckDBPyConnection = Depends(_get_db), +): + """Save column metadata for a table. Admin only.""" + repo = ColumnMetadataRepository(conn) + for item in body.columns: + repo.save( + table_id=table_id, + column_name=item.column_name, + basetype=item.basetype, + description=item.description, + confidence=item.confidence, + ) + return {"status": "ok", "count": len(body.columns)} + + +@router.post("/api/admin/metadata/{table_id}/push") +async def push_metadata_to_source( + table_id: str, + user: dict = Depends(require_admin), + conn: duckdb.DuckDBPyConnection = Depends(_get_db), +): + """Push column metadata to Keboola Storage API. Admin only.""" + registry_repo = TableRegistryRepository(conn) + table = registry_repo.get(table_id) + if not table: + raise HTTPException(status_code=404, detail=f"Table not found: {table_id}") + + source_type = table.get("source_type", "") + if source_type != "keboola": + raise HTTPException( + status_code=400, + detail=f"Push is only supported for keboola tables (table source_type={source_type!r})", + ) + + source_table = table.get("source_table") or table_id + stack_url = os.environ.get("KBC_STACK_URL", "").rstrip("/") + token = os.environ.get("KBC_STORAGE_TOKEN", "") + + if not stack_url or not token: + raise HTTPException( + status_code=500, + detail="KBC_STACK_URL and KBC_STORAGE_TOKEN must be set", + ) + + metadata_repo = ColumnMetadataRepository(conn) + columns = metadata_repo.list_for_table(table_id) + if not columns: + return {"status": "ok", "pushed": 0, "message": "No column metadata to push"} + + pushed = 0 + errors = [] + + for col in columns: + column_name = col["column_name"] + metadata_payload = [] + + if col.get("basetype"): + metadata_payload.append({"key": "KBC.datatype.basetype", "value": col["basetype"]}) + if col.get("description"): + metadata_payload.append({"key": "KBC.description", "value": col["description"]}) + + if not metadata_payload: + continue + + endpoint = f"{stack_url}/v2/storage/tables/{source_table}/columns/{column_name}/metadata" + try: + resp = httpx.post( + endpoint, + headers={"X-StorageApi-Token": token}, + json={"provider": "ai-metadata-enrichment", "metadata": metadata_payload}, + timeout=30, + ) + if resp.status_code in (200, 201): + pushed += 1 + else: + errors.append(f"{column_name}: {resp.status_code} {resp.text[:200]}") + except httpx.RequestError as e: + errors.append(f"{column_name}: request error — {e}") + + result = {"status": "ok", "pushed": pushed} + if errors: + result["errors"] = errors + return result diff --git a/app/main.py b/app/main.py index fc7eb04..78c235a 100644 --- a/app/main.py +++ b/app/main.py @@ -28,6 +28,7 @@ from app.api.permissions import router as permissions_router from app.api.access_requests import router as access_requests_router from app.api.jira_webhooks import router as jira_webhooks_router from app.api.metrics import router as metrics_router +from app.api.metadata import router as metadata_router from app.web.router import router as web_router logger = logging.getLogger(__name__) @@ -135,6 +136,7 @@ def create_app() -> FastAPI: app.include_router(access_requests_router) app.include_router(jira_webhooks_router) app.include_router(metrics_router) + app.include_router(metadata_router) # Web UI router (must be last — has catch-all routes) app.include_router(web_router) diff --git a/tests/test_api.py b/tests/test_api.py index 4a586d1..0e752c2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -329,3 +329,38 @@ class TestMetricsAPI: data = resp.json() assert data["count"] == 1 assert data["metrics"][0]["category"] == "finance" + + +class TestMetadataAPI: + def test_get_metadata_empty(self, seeded_client): + client, admin_token, _ = seeded_client + resp = client.get("/api/admin/metadata/orders", headers={"Authorization": f"Bearer {admin_token}"}) + assert resp.status_code == 200 + assert resp.json()["columns"] == [] + + def test_save_and_get_metadata(self, seeded_client): + client, admin_token, _ = seeded_client + resp = client.post( + "/api/admin/metadata/orders", + json={"columns": [ + {"column_name": "id", "basetype": "STRING", "description": "Order ID"}, + {"column_name": "total", "basetype": "NUMERIC", "description": "Total"}, + ]}, + headers={"Authorization": f"Bearer {admin_token}"}, + ) + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" + assert resp.json()["count"] == 2 + + resp = client.get("/api/admin/metadata/orders", headers={"Authorization": f"Bearer {admin_token}"}) + assert resp.status_code == 200 + assert len(resp.json()["columns"]) == 2 + + def test_analyst_cannot_save_metadata(self, seeded_client): + client, _, analyst_token = seeded_client + resp = client.post( + "/api/admin/metadata/orders", + json={"columns": [{"column_name": "id", "basetype": "STRING"}]}, + headers={"Authorization": f"Bearer {analyst_token}"}, + ) + assert resp.status_code == 403