agnes-the-ai-analyst/pyproject.toml

[project]
name = "agnes-the-ai-analyst"
version = "0.54.4"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"
readme = "README.md"

dependencies = [
    # Core database
    # 1.5.2 fixes a FK-dependency regression that affected ALTER TABLE on
    # tables referenced by other tables — broke the test_db migration
    # ladder replay on 1.5.1. CI runs 1.5.2; local devs need it too.
    "duckdb>=1.5.2",
    # Web framework (FastAPI)
    "fastapi>=0.115.0",
    "uvicorn[standard]>=0.32.0",
    "python-multipart>=0.0.27",
    "jinja2>=3.1.0",
    "starlette>=0.41.0",
    # Authentication
    "PyJWT>=2.8.0",
    "itsdangerous>=2.1.0",
    "authlib>=1.6.12",
    "argon2-cffi>=23.1.0",
    # HTTP client. `h2` enables HTTP/2 multiplexing for the persistent
    # CLI client used by `agnes pull` (one TCP connection serves N
    # concurrent parquet streams + range chunks). `cli/client.py`
    # gracefully falls back to HTTP/1.1 if h2 is missing, so this
    # extra is for performance, not correctness.
    "httpx>=0.27.0",
    "h2>=4.1.0",
    # CLI
    "typer>=0.12.0",
    "rich>=13.0.0",
    # Configuration
    "python-dotenv>=1.0.0",
    "pyyaml>=6.0",
    # Data processing
    "pandas>=2.0.0",
    "pyarrow>=12.0.0",
    "pytz>=2024.1",
    # SQL parsing — server-side WHERE validator for /api/v2/scan (app/api/where_validator.py)
    # Minimum 30.x — older versions had walk() yielding (node, parent, key)
    # tuples instead of expression nodes, which would silently bypass the
    # WHERE-validator structural checks (isinstance(tuple, exp.Subquery)
    # is always False). 30.x yields nodes directly.
    "sqlglot>=30.0.0",
    # Data source connectors
    "google-cloud-bigquery>=3.0.0",
    "google-cloud-bigquery-storage>=2.0.0",
    # Google Workspace Cloud Identity / Admin SDK (Workspace group membership sync)
    "google-api-python-client>=2.0.0",
    # Profiler visualizations
    "matplotlib>=3.8.0",
    "numpy>=1.24.0",
    # Claude Code marketplace endpoint — pure-Python git server mounted in FastAPI
    "dulwich>=0.22.0",
    "a2wsgi>=1.10.0",
    # In-process TTL cache for marketplace etag (transitively present via
    # google-auth, declared explicitly here because we depend on it directly).
    "cachetools>=5.3.0",
    # Per-IP rate limiting on auth endpoints (#45). In-process counters by
    # default — fine for single-replica deploys. Multi-replica rollouts can
    # swap the storage backend via slowapi's `storage_uri` (Redis, Memcached).
    "slowapi>=0.1.9",
    # LLM provider SDKs — core (not dev) because connectors/llm/*_provider.py
    # is imported by services/{corporate_memory, verification_detector} which
    # the scheduler drives in production. Promoted from [dev] in #176 to fix
    # ModuleNotFoundError boot loops on default Compose deploys.
    "anthropic>=0.30.0",
    "openai>=1.30.0",
    # Keboola Storage API SDK — used by:
    #   - `connectors/keboola/client.py` for admin-side bucket / table list
    #     (consumed from `app/api/admin.py` discover-and-register, table
    #     metadata refresh).
    # Extraction itself uses the lightweight `connectors/keboola/storage_api.py`
    # module (export-async + signed-URL download) which talks to Storage API
    # directly via `requests` — no SDK dependency on the data-path side. The
    # SDK stays for the metadata reads.
    #
    # NOTE: kbcstorage moved to the [server] extra below — see the rationale
    # in [project.optional-dependencies].server. CLI wheels installed via
    # `uv tool install` deliberately ship without it.
    "sse-starlette>=2.0",
    # Optional observability — pure-Python, no compilation. Lazily initialized
    # in src/observability/posthog_client.py and only emits events when
    # POSTHOG_API_KEY is set in the environment. With the key unset the
    # integration is fully off (no network, no init). See docs/observability.md.
    "posthog>=3.7.0",
    # Rust-backed (ammonia) HTML sanitizer for admin-edited rich content
    # (news intro + body, curated marketplace-metadata.json descriptions).
    # Allowlist-based with per-tag attribute scoping; closes the bypass
    # shapes the legacy regex sanitizer in src/welcome_template.py was
    # vulnerable to. Pre-built wheels published for all supported
    # (mac/linux/windows × arm64/x86_64) targets.
    "nh3>=0.2",
    # CommonMark markdown renderer for curator-authored marketplace-metadata.json
    # rich content (plugin description / sample_interaction.assistant). Pure
    # Python, no compilation. Rendered output is funneled through nh3 above.
    "markdown-it-py>=3.0",
    # Cross-platform advisory file locking for the `agnes push` single-instance
    # guard. Wraps fcntl.flock on POSIX and msvcrt.locking on Windows behind
    # a uniform API; OS releases the lock automatically on process exit (no
    # stale-lock detection required). Used by cli/lib/push_lock.py.
    "filelock>=3.13,<4",
    # Transitive dependency hardened directly to dodge 5 dependabot advisories
    # (4 high, 1 medium) flagged on urllib3<2.7.0: cross-origin sensitive
    # header leak on proxied low-level redirects, decompression-bomb bypass
    # + unbounded decompression chain on the streaming API, redirects-when-
    # retries-disabled. The `[server]` extra below adds kbcstorage which
    # transitively caps urllib3<2.0.0; `[tool.uv] override-dependencies`
    # forces 2.7+ in workspace installs (Dockerfile + dev). Wheel consumers
    # who install only the CLI (`uv tool install <wheel>`) get no kbcstorage
    # and no conflict.
    "urllib3>=2.7.0",
]

[project.optional-dependencies]
# Server-side connectors. The CLI wheel does NOT need these — analysts who
# `uv tool install` the wheel never reach a kbcstorage import. Splitting it
# out keeps the wheel's METADATA `Requires-Dist` set free of the
# `kbcstorage<=0.9.5 → urllib3<2.0.0` cap that conflicts with our
# `urllib3>=2.7.0` security pin under any fresh resolver context (where
# `[tool.uv] override-dependencies` does NOT apply — see comment on
# [tool.uv] below). Server install pulls it in via Dockerfile's
# `uv pip install --system --no-cache .[server]`.
server = [
    "kbcstorage>=0.9.0",
]
observability = [
    # Already in base dependencies — listed here so operators who want to
    # be explicit can `pip install -e ".[observability]"` and signal intent.
    "posthog>=3.7.0",
]
dev = [
    "pytest>=9.0.0",
    "pytest-timeout>=2.0.0",
    "pytest-xdist>=3.0.0",
    "faker>=24.0.0",
    # jsonschema validates the corporate-memory extraction-tool golden fixtures
    # under tests/test_corporate_memory_v1.py (extraction.json, correction.json,
    # confidence_calibration.json). Production code does not depend on it.
    "jsonschema>=4.0.0",
    # FastAPI debug toolbar — gated behind DEBUG=1 env var in app/main.py.
    # Provides per-request panels (headers, routes, timer, profiling, etc.)
    # for local development. Never loaded in production (no DEBUG=1 there).
    "fastapi-debug-toolbar>=0.6.3",
]

[project.scripts]
agnes = "cli.main:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["app", "src", "connectors", "cli", "services", "config"]

[tool.ruff]
line-length = 120
target-version = "py313"

[tool.uv]
dev-dependencies = [
    "pytest>=9.0.0",
    "pytest-timeout>=2.0.0",
    "pytest-xdist>=3.0.0",
    "faker>=24.0.0",
    "anthropic>=0.30.0",
    "openai>=1.30.0",
    "fastapi-debug-toolbar>=0.6.3",
]
# Override the urllib3<2.0.0 ceiling kbcstorage 0.9.5 declares (upstream
# hasn't relaxed it as of 2026-05-12 but the SDK works fine against
# urllib3 2.x in practice — we only use `Client` + `Tables` from it and
# both go through `requests`, which natively supports both lines). Lets
# the resolver pick a urllib3 line that closes Dependabot advisories
# CVE-2024-37891 / CVE-2025-{xxx}. See `urllib3>=2.7.0` in [project]
# dependencies above for the security rationale.
override-dependencies = ["urllib3>=2.7.0"]