agnes-the-ai-analyst/src/repositories/column_metadata.py

107 lines
3.9 KiB
Python

"""Repository for column metadata."""
import json
from datetime import datetime, timezone
from typing import Any, Optional, List, Dict
import duckdb
class ColumnMetadataRepository:
def __init__(self, conn: duckdb.DuckDBPyConnection):
self.conn = conn
def save(
self,
table_id: str,
column_name: str,
basetype: Optional[str] = None,
description: Optional[str] = None,
confidence: str = "manual",
source: str = "manual",
) -> dict:
"""Insert or update column metadata. Returns the saved record."""
now = datetime.now(timezone.utc)
self.conn.execute(
"""INSERT INTO column_metadata (table_id, column_name, basetype, description, confidence, source, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT (table_id, column_name) DO UPDATE SET
basetype = excluded.basetype,
description = excluded.description,
confidence = excluded.confidence,
source = excluded.source,
updated_at = excluded.updated_at""",
[table_id, column_name, basetype, description, confidence, source, now],
)
return self.get(table_id, column_name)
def get(self, table_id: str, column_name: str) -> Optional[Dict[str, Any]]:
"""Select by composite PK. Returns None if not found."""
result = self.conn.execute(
"SELECT * FROM column_metadata WHERE table_id = ? AND column_name = ?",
[table_id, column_name],
).fetchone()
if not result:
return None
columns = [desc[0] for desc in self.conn.description]
return dict(zip(columns, result))
def list_for_table(self, table_id: str) -> List[Dict[str, Any]]:
"""Select all columns for a table, ordered by column_name."""
results = self.conn.execute(
"SELECT * FROM column_metadata WHERE table_id = ? ORDER BY column_name",
[table_id],
).fetchall()
if not results:
return []
columns = [desc[0] for desc in self.conn.description]
return [dict(zip(columns, row)) for row in results]
def delete(self, table_id: str, column_name: str) -> bool:
"""Delete column metadata. Returns True if a row was deleted."""
before = self.conn.execute(
"SELECT COUNT(*) FROM column_metadata WHERE table_id = ? AND column_name = ?",
[table_id, column_name],
).fetchone()[0]
if before == 0:
return False
self.conn.execute(
"DELETE FROM column_metadata WHERE table_id = ? AND column_name = ?",
[table_id, column_name],
)
return True
def import_proposal(self, proposal_path: str) -> int:
"""Import a proposal JSON file.
Format:
{
"tables": {
"orders": {
"columns": {
"id": {"basetype": "STRING", "description": "...", "confidence": "high"}
}
}
}
}
Sets source="ai_enrichment". Returns count of columns imported.
"""
with open(proposal_path, "r", encoding="utf-8") as f:
proposal = json.load(f)
count = 0
tables = proposal.get("tables", {})
for table_id, table_data in tables.items():
columns = table_data.get("columns", {})
for column_name, col_data in columns.items():
self.save(
table_id=table_id,
column_name=column_name,
basetype=col_data.get("basetype"),
description=col_data.get("description"),
confidence=col_data.get("confidence", "high"),
source="ai_enrichment",
)
count += 1
return count