feat: add column metadata API with Keboola push support

This commit is contained in:
ZdenekSrotyr 2026-04-10 19:44:03 +02:00
parent a3531f0ead
commit d0cdfcf8c1
3 changed files with 167 additions and 0 deletions

130
app/api/metadata.py Normal file
View file

@ -0,0 +1,130 @@
"""Column metadata API endpoints — CRUD and Keboola push."""
import logging
import os
from typing import List, Optional
import duckdb
import httpx
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from app.auth.dependencies import get_current_user, require_admin, _get_db
from src.repositories.column_metadata import ColumnMetadataRepository
from src.repositories.table_registry import TableRegistryRepository
logger = logging.getLogger(__name__)
router = APIRouter(tags=["metadata"])
class ColumnMetadataItem(BaseModel):
column_name: str
basetype: Optional[str] = None
description: Optional[str] = None
confidence: str = "manual"
class ColumnMetadataSave(BaseModel):
columns: List[ColumnMetadataItem]
@router.get("/api/admin/metadata/{table_id}")
async def get_table_metadata(
table_id: str,
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Return column metadata for a table."""
repo = ColumnMetadataRepository(conn)
columns = repo.list_for_table(table_id)
return {"table_id": table_id, "columns": columns}
@router.post("/api/admin/metadata/{table_id}")
async def save_table_metadata(
table_id: str,
body: ColumnMetadataSave,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Save column metadata for a table. Admin only."""
repo = ColumnMetadataRepository(conn)
for item in body.columns:
repo.save(
table_id=table_id,
column_name=item.column_name,
basetype=item.basetype,
description=item.description,
confidence=item.confidence,
)
return {"status": "ok", "count": len(body.columns)}
@router.post("/api/admin/metadata/{table_id}/push")
async def push_metadata_to_source(
table_id: str,
user: dict = Depends(require_admin),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Push column metadata to Keboola Storage API. Admin only."""
registry_repo = TableRegistryRepository(conn)
table = registry_repo.get(table_id)
if not table:
raise HTTPException(status_code=404, detail=f"Table not found: {table_id}")
source_type = table.get("source_type", "")
if source_type != "keboola":
raise HTTPException(
status_code=400,
detail=f"Push is only supported for keboola tables (table source_type={source_type!r})",
)
source_table = table.get("source_table") or table_id
stack_url = os.environ.get("KBC_STACK_URL", "").rstrip("/")
token = os.environ.get("KBC_STORAGE_TOKEN", "")
if not stack_url or not token:
raise HTTPException(
status_code=500,
detail="KBC_STACK_URL and KBC_STORAGE_TOKEN must be set",
)
metadata_repo = ColumnMetadataRepository(conn)
columns = metadata_repo.list_for_table(table_id)
if not columns:
return {"status": "ok", "pushed": 0, "message": "No column metadata to push"}
pushed = 0
errors = []
for col in columns:
column_name = col["column_name"]
metadata_payload = []
if col.get("basetype"):
metadata_payload.append({"key": "KBC.datatype.basetype", "value": col["basetype"]})
if col.get("description"):
metadata_payload.append({"key": "KBC.description", "value": col["description"]})
if not metadata_payload:
continue
endpoint = f"{stack_url}/v2/storage/tables/{source_table}/columns/{column_name}/metadata"
try:
resp = httpx.post(
endpoint,
headers={"X-StorageApi-Token": token},
json={"provider": "ai-metadata-enrichment", "metadata": metadata_payload},
timeout=30,
)
if resp.status_code in (200, 201):
pushed += 1
else:
errors.append(f"{column_name}: {resp.status_code} {resp.text[:200]}")
except httpx.RequestError as e:
errors.append(f"{column_name}: request error — {e}")
result = {"status": "ok", "pushed": pushed}
if errors:
result["errors"] = errors
return result

View file

@ -28,6 +28,7 @@ from app.api.permissions import router as permissions_router
from app.api.access_requests import router as access_requests_router
from app.api.jira_webhooks import router as jira_webhooks_router
from app.api.metrics import router as metrics_router
from app.api.metadata import router as metadata_router
from app.web.router import router as web_router
logger = logging.getLogger(__name__)
@ -135,6 +136,7 @@ def create_app() -> FastAPI:
app.include_router(access_requests_router)
app.include_router(jira_webhooks_router)
app.include_router(metrics_router)
app.include_router(metadata_router)
# Web UI router (must be last — has catch-all routes)
app.include_router(web_router)

View file

@ -329,3 +329,38 @@ class TestMetricsAPI:
data = resp.json()
assert data["count"] == 1
assert data["metrics"][0]["category"] == "finance"
class TestMetadataAPI:
def test_get_metadata_empty(self, seeded_client):
client, admin_token, _ = seeded_client
resp = client.get("/api/admin/metadata/orders", headers={"Authorization": f"Bearer {admin_token}"})
assert resp.status_code == 200
assert resp.json()["columns"] == []
def test_save_and_get_metadata(self, seeded_client):
client, admin_token, _ = seeded_client
resp = client.post(
"/api/admin/metadata/orders",
json={"columns": [
{"column_name": "id", "basetype": "STRING", "description": "Order ID"},
{"column_name": "total", "basetype": "NUMERIC", "description": "Total"},
]},
headers={"Authorization": f"Bearer {admin_token}"},
)
assert resp.status_code == 200
assert resp.json()["status"] == "ok"
assert resp.json()["count"] == 2
resp = client.get("/api/admin/metadata/orders", headers={"Authorization": f"Bearer {admin_token}"})
assert resp.status_code == 200
assert len(resp.json()["columns"]) == 2
def test_analyst_cannot_save_metadata(self, seeded_client):
client, _, analyst_token = seeded_client
resp = client.post(
"/api/admin/metadata/orders",
json={"columns": [{"column_name": "id", "basetype": "STRING"}]},
headers={"Authorization": f"Bearer {analyst_token}"},
)
assert resp.status_code == 403