fix: address Devin review — partial download cleanup, category validation, path escaping, docs
- cli/commands/analyst.py: delete partial parquet file on download failure to unblock re-download - cli/commands/analyst.py: escape single quotes in parquet path to prevent SQL injection - app/api/metrics.py: replace tempfile-based import with inline YAML parse + direct repo.create(); validates name+category upfront and returns 400 if missing; removes os/tempfile imports - CLAUDE.md: update schema version text to v4 with full migration chain Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4b4c071959
commit
fbad3f5538
3 changed files with 61 additions and 14 deletions
|
|
@ -170,7 +170,7 @@ Auth providers in `app/auth/` (FastAPI-based):
|
|||
## Key Implementation Details
|
||||
|
||||
### DuckDB Schema (src/db.py)
|
||||
- Schema v3 with auto-migration from v1→v2→v3
|
||||
- Schema v4 with auto-migration from v1→v2→v3→v4
|
||||
- `table_registry`: id, name, source_type, bucket, source_table, query_mode, sync_schedule, etc.
|
||||
- `sync_state`, `sync_history`: track extraction progress
|
||||
- `users`, `dataset_permissions`, `audit_log`: auth + RBAC
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
"""Metrics API endpoints — CRUD for metric definitions stored in DuckDB."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List, Optional
|
||||
|
||||
import duckdb
|
||||
|
|
@ -119,14 +117,60 @@ async def import_metrics(
|
|||
):
|
||||
"""Import metrics from uploaded YAML file."""
|
||||
content = await file.read()
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False, mode="wb") as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
repo = MetricRepository(conn)
|
||||
count = repo.import_from_yaml(tmp_path)
|
||||
return {"status": "imported", "count": count}
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
data = yaml.safe_load(content)
|
||||
except yaml.YAMLError as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
|
||||
|
||||
if not data:
|
||||
raise HTTPException(status_code=400, detail="Empty YAML file")
|
||||
|
||||
metric_list = data if isinstance(data, list) else [data]
|
||||
repo = MetricRepository(conn)
|
||||
count = 0
|
||||
|
||||
for metric in metric_list:
|
||||
if not isinstance(metric, dict):
|
||||
continue
|
||||
name = metric.get("name")
|
||||
category = metric.get("category")
|
||||
if not name or not category:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Each metric must have 'name' and 'category' fields",
|
||||
)
|
||||
|
||||
metric_id = f"{category}/{name}"
|
||||
table_name = metric.pop("table", None) or metric.get("table_name")
|
||||
|
||||
# Collect sql_by_* variants
|
||||
sql_variants = {}
|
||||
for key in list(metric.keys()):
|
||||
if key.startswith("sql_by_"):
|
||||
sql_variants[key[4:]] = metric.pop(key)
|
||||
|
||||
repo.create(
|
||||
id=metric_id,
|
||||
name=name,
|
||||
display_name=metric.get("display_name", name),
|
||||
category=category,
|
||||
description=metric.get("description"),
|
||||
type=metric.get("type", "sum"),
|
||||
unit=metric.get("unit"),
|
||||
grain=metric.get("grain", "monthly"),
|
||||
table_name=table_name,
|
||||
tables=metric.get("tables"),
|
||||
expression=metric.get("expression"),
|
||||
time_column=metric.get("time_column"),
|
||||
dimensions=metric.get("dimensions"),
|
||||
filters=metric.get("filters"),
|
||||
synonyms=metric.get("synonyms"),
|
||||
notes=metric.get("notes"),
|
||||
sql=metric.get("sql", ""),
|
||||
sql_variants=sql_variants if sql_variants else None,
|
||||
validation=metric.get("validation"),
|
||||
source="yaml_import",
|
||||
)
|
||||
count += 1
|
||||
|
||||
return {"status": "imported", "count": count}
|
||||
|
|
|
|||
|
|
@ -197,6 +197,8 @@ def _download_data(workspace: Path, server_url: str, token: str) -> int:
|
|||
typer.echo(f" Downloaded {table_id}")
|
||||
except Exception as e:
|
||||
typer.echo(f" Warning: could not download {table_id}: {e}", err=True)
|
||||
if target.exists():
|
||||
target.unlink()
|
||||
|
||||
return downloaded
|
||||
|
||||
|
|
@ -231,10 +233,11 @@ def _initialize_duckdb(workspace: Path) -> int:
|
|||
typer.echo(f" Warning: Skipping {pq_file.name}: unsafe view name", err=True)
|
||||
continue
|
||||
abs_path = str(pq_resolved)
|
||||
safe_path = abs_path.replace("'", "''")
|
||||
try:
|
||||
conn.execute(f'DROP VIEW IF EXISTS "{view_name}"')
|
||||
conn.execute(
|
||||
f"CREATE VIEW \"{view_name}\" AS SELECT * FROM read_parquet('{abs_path}')"
|
||||
f"CREATE VIEW \"{view_name}\" AS SELECT * FROM read_parquet('{safe_path}')"
|
||||
)
|
||||
count = conn.execute(f'SELECT count(*) FROM "{view_name}"').fetchone()[0]
|
||||
total_rows += count
|
||||
|
|
|
|||
Loading…
Reference in a new issue