## Summary
`agnes query "DESCRIBE unit_economics"` (where `unit_economics` is `query_mode='remote'`) previously returned DuckDB's nearest-name suggestion (`Did you mean "order_economics"`?), sending users down the wrong path. Now appends a friendly hint about remote tables.
Reproduced from a real analyst session — colleague spent ~30s diagnosing what was actually "this is a remote table, not materialized locally".
## Test plan
- [x] New test: `_query_local("DESCRIBE unit_economics", ...)` against an empty local DuckDB triggers the new hint, original DuckDB error still echoed.
- [x] Negative test: a syntax-error query does NOT trigger the hint (regex only matches "Table with name X does not exist").
- [x] `pytest tests/test_cli_query*.py` clean.
<!-- devin-review-badge-begin -->
---
<a href="https://app.devin.ai/review/keboola/agnes-the-ai-analyst/pull/219" target="_blank">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1">
<img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open in Devin Review">
</picture>
</a>
<!-- devin-review-badge-end -->
120 lines
4.3 KiB
TOML
120 lines
4.3 KiB
TOML
[project]
|
|
name = "agnes-the-ai-analyst"
|
|
version = "0.46.2"
|
|
description = "Agnes — AI Data Analyst platform for AI analytical systems"
|
|
requires-python = ">=3.11,<3.14"
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
|
|
dependencies = [
|
|
# Core database
|
|
"duckdb>=0.9.0",
|
|
# Web framework (FastAPI)
|
|
"fastapi>=0.115.0",
|
|
"uvicorn[standard]>=0.32.0",
|
|
"python-multipart>=0.0.27",
|
|
"jinja2>=3.1.0",
|
|
"starlette>=0.41.0",
|
|
# Authentication
|
|
"PyJWT>=2.8.0",
|
|
"itsdangerous>=2.1.0",
|
|
"authlib>=1.6.11",
|
|
"argon2-cffi>=23.1.0",
|
|
# HTTP client. `h2` enables HTTP/2 multiplexing for the persistent
|
|
# CLI client used by `agnes pull` (one TCP connection serves N
|
|
# concurrent parquet streams + range chunks). `cli/client.py`
|
|
# gracefully falls back to HTTP/1.1 if h2 is missing, so this
|
|
# extra is for performance, not correctness.
|
|
"httpx>=0.27.0",
|
|
"h2>=4.1.0",
|
|
# CLI
|
|
"typer>=0.12.0",
|
|
"rich>=13.0.0",
|
|
# Configuration
|
|
"python-dotenv>=1.0.0",
|
|
"pyyaml>=6.0",
|
|
# Data processing
|
|
"pandas>=2.0.0",
|
|
"pyarrow>=12.0.0",
|
|
"pytz>=2024.1",
|
|
# SQL parsing — server-side WHERE validator for /api/v2/scan (app/api/where_validator.py)
|
|
# Minimum 30.x — older versions had walk() yielding (node, parent, key)
|
|
# tuples instead of expression nodes, which would silently bypass the
|
|
# WHERE-validator structural checks (isinstance(tuple, exp.Subquery)
|
|
# is always False). 30.x yields nodes directly.
|
|
"sqlglot>=30.0.0",
|
|
# Data source connectors
|
|
"google-cloud-bigquery>=3.0.0",
|
|
"google-cloud-bigquery-storage>=2.0.0",
|
|
# Google Workspace Cloud Identity / Admin SDK (Workspace group membership sync)
|
|
"google-api-python-client>=2.0.0",
|
|
# Profiler visualizations
|
|
"matplotlib>=3.8.0",
|
|
"numpy>=1.24.0",
|
|
# Claude Code marketplace endpoint — pure-Python git server mounted in FastAPI
|
|
"dulwich>=0.22.0",
|
|
"a2wsgi>=1.10.0",
|
|
# In-process TTL cache for marketplace etag (transitively present via
|
|
# google-auth, declared explicitly here because we depend on it directly).
|
|
"cachetools>=5.3.0",
|
|
# Per-IP rate limiting on auth endpoints (#45). In-process counters by
|
|
# default — fine for single-replica deploys. Multi-replica rollouts can
|
|
# swap the storage backend via slowapi's `storage_uri` (Redis, Memcached).
|
|
"slowapi>=0.1.9",
|
|
# LLM provider SDKs — core (not dev) because connectors/llm/*_provider.py
|
|
# is imported by services/{corporate_memory, verification_detector} which
|
|
# the scheduler drives in production. Promoted from [dev] in #176 to fix
|
|
# ModuleNotFoundError boot loops on default Compose deploys.
|
|
"anthropic>=0.30.0",
|
|
"openai>=1.30.0",
|
|
# Keboola Storage API SDK — used by:
|
|
# - `connectors/keboola/client.py` for admin-side bucket / table list
|
|
# (consumed from `app/api/admin.py` discover-and-register, table
|
|
# metadata refresh).
|
|
# Extraction itself uses the lightweight `connectors/keboola/storage_api.py`
|
|
# module (export-async + signed-URL download) which talks to Storage API
|
|
# directly via `requests` — no SDK dependency on the data-path side. The
|
|
# SDK stays for the metadata reads.
|
|
"kbcstorage>=0.9.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
"pytest>=9.0.0",
|
|
"pytest-timeout>=2.0.0",
|
|
"pytest-xdist>=3.0.0",
|
|
"faker>=24.0.0",
|
|
# jsonschema validates the corporate-memory extraction-tool golden fixtures
|
|
# under tests/test_corporate_memory_v1.py (extraction.json, correction.json,
|
|
# confidence_calibration.json). Production code does not depend on it.
|
|
"jsonschema>=4.0.0",
|
|
# FastAPI debug toolbar — gated behind DEBUG=1 env var in app/main.py.
|
|
# Provides per-request panels (headers, routes, timer, profiling, etc.)
|
|
# for local development. Never loaded in production (no DEBUG=1 there).
|
|
"fastapi-debug-toolbar>=0.6.3",
|
|
]
|
|
|
|
[project.scripts]
|
|
agnes = "cli.main:_run_with_clean_errors"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["app", "src", "connectors", "cli", "services", "config"]
|
|
|
|
[tool.ruff]
|
|
line-length = 120
|
|
target-version = "py313"
|
|
|
|
[tool.uv]
|
|
dev-dependencies = [
|
|
"pytest>=9.0.0",
|
|
"pytest-timeout>=2.0.0",
|
|
"pytest-xdist>=3.0.0",
|
|
"faker>=24.0.0",
|
|
"anthropic>=0.30.0",
|
|
"openai>=1.30.0",
|
|
"fastapi-debug-toolbar>=0.6.3",
|
|
]
|