agnes-the-ai-analyst/pyproject.toml
ZdenekSrotyr ed3e8337ab
fix(jira): harden _remote_links fetch — prevent transient outage from wiping parquet rows (#319)
* fix(jira): harden _remote_links fetch — transient API failure no longer wipes parquet rows

Pre-fix, all three fetch_remote_links call sites (service.py,
scripts/backfill.py, scripts/backfill_remote_links.py) silently
returned [] on 401/403/429/5xx or httpx.RequestError. Callers overlaid
that [] onto cached issue JSON, and transform_remote_links interpreted
the empty list as 'issue legitimately has no remote links — delete
existing rows', so a transient Jira auth blip permanently wiped
remote-link history.

Now:
- Every fetch site raises JiraFetchError on non-200/non-404 status,
  on httpx.RequestError, and on the 'service not configured' path.
- Overlay sites skip the _remote_links key when fetch raises, leaving
  it ABSENT (not present-but-empty).
- transform_remote_links returns None for absent/null keys (preserve
  existing rows) vs [] (legitimate empty — wipe).
- Both consumers (batch transform_all, incremental
  transform_single_issue) honor the new contract.
- End-to-end tests
  test_incremental_preserves_remote_links_when_overlay_absent and
  test_incremental_wipes_remote_links_when_overlay_present_but_empty
  lock both halves.

Adversarial-review fixes bundled:
- service.py: unconfigured-service path now raises JiraFetchError
  instead of returning [] (a webhook can arrive while API creds are
  missing — HMAC verification uses a separate JIRA_WEBHOOK_SECRET).
  Regression guard test_raises_when_unconfigured added.
- consistency_check.py: AUTO_FIX_THRESHOLD bumped 10 -> 20 to cover
  typical SLA-poller hiccups before escalating to ERROR.
- CLAUDE.md: connectors/jira/transform.py removed from 'Files NOT to
  modify' (overlay-contract change required touching it; module
  remains sensitive but is no longer off-limits).

* release: 0.54.19 — jira remote_links hardening (transient API failure no longer wipes parquet rows)
2026-05-15 19:09:46 +02:00

187 lines
8.2 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[project]
name = "agnes-the-ai-analyst"
version = "0.54.19"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"
readme = "README.md"
dependencies = [
# Core database
# 1.5.2 fixes a FK-dependency regression that affected ALTER TABLE on
# tables referenced by other tables — broke the test_db migration
# ladder replay on 1.5.1. CI runs 1.5.2; local devs need it too.
"duckdb>=1.5.2",
# Web framework (FastAPI)
"fastapi>=0.115.0",
"uvicorn[standard]>=0.32.0",
"python-multipart>=0.0.27",
"jinja2>=3.1.0",
"starlette>=0.41.0",
# Authentication
"PyJWT>=2.8.0",
"itsdangerous>=2.1.0",
"authlib>=1.6.12",
"argon2-cffi>=23.1.0",
# HTTP client. `h2` enables HTTP/2 multiplexing for the persistent
# CLI client used by `agnes pull` (one TCP connection serves N
# concurrent parquet streams + range chunks). `cli/client.py`
# gracefully falls back to HTTP/1.1 if h2 is missing, so this
# extra is for performance, not correctness.
"httpx>=0.27.0",
"h2>=4.1.0",
# CLI
"typer>=0.12.0",
"rich>=13.0.0",
# Configuration
"python-dotenv>=1.0.0",
"pyyaml>=6.0",
# Data processing
"pandas>=2.0.0",
"pyarrow>=12.0.0",
"pytz>=2024.1",
# SQL parsing — server-side WHERE validator for /api/v2/scan (app/api/where_validator.py)
# Minimum 30.x — older versions had walk() yielding (node, parent, key)
# tuples instead of expression nodes, which would silently bypass the
# WHERE-validator structural checks (isinstance(tuple, exp.Subquery)
# is always False). 30.x yields nodes directly.
"sqlglot>=30.0.0",
# Data source connectors
"google-cloud-bigquery>=3.0.0",
"google-cloud-bigquery-storage>=2.0.0",
# Google Workspace Cloud Identity / Admin SDK (Workspace group membership sync)
"google-api-python-client>=2.0.0",
# Profiler visualizations
"matplotlib>=3.8.0",
"numpy>=1.24.0",
# Claude Code marketplace endpoint — pure-Python git server mounted in FastAPI
"dulwich>=0.22.0",
"a2wsgi>=1.10.0",
# In-process TTL cache for marketplace etag (transitively present via
# google-auth, declared explicitly here because we depend on it directly).
"cachetools>=5.3.0",
# Per-IP rate limiting on auth endpoints (#45). In-process counters by
# default — fine for single-replica deploys. Multi-replica rollouts can
# swap the storage backend via slowapi's `storage_uri` (Redis, Memcached).
"slowapi>=0.1.9",
# LLM provider SDKs — core (not dev) because connectors/llm/*_provider.py
# is imported by services/{corporate_memory, verification_detector} which
# the scheduler drives in production. Promoted from [dev] in #176 to fix
# ModuleNotFoundError boot loops on default Compose deploys.
"anthropic>=0.30.0",
"openai>=1.30.0",
# Keboola Storage API SDK — used by:
# - `connectors/keboola/client.py` for admin-side bucket / table list
# (consumed from `app/api/admin.py` discover-and-register, table
# metadata refresh).
# Extraction itself uses the lightweight `connectors/keboola/storage_api.py`
# module (export-async + signed-URL download) which talks to Storage API
# directly via `requests` — no SDK dependency on the data-path side. The
# SDK stays for the metadata reads.
#
# NOTE: kbcstorage moved to the [server] extra below — see the rationale
# in [project.optional-dependencies].server. CLI wheels installed via
# `uv tool install` deliberately ship without it.
"sse-starlette>=2.0",
# Optional observability — pure-Python, no compilation. Lazily initialized
# in src/observability/posthog_client.py and only emits events when
# POSTHOG_API_KEY is set in the environment. With the key unset the
# integration is fully off (no network, no init). See docs/observability.md.
"posthog>=3.7.0",
# Rust-backed (ammonia) HTML sanitizer for admin-edited rich content
# (news intro + body, curated marketplace-metadata.json descriptions).
# Allowlist-based with per-tag attribute scoping; closes the bypass
# shapes the legacy regex sanitizer in src/welcome_template.py was
# vulnerable to. Pre-built wheels published for all supported
# (mac/linux/windows × arm64/x86_64) targets.
"nh3>=0.2",
# CommonMark markdown renderer for curator-authored marketplace-metadata.json
# rich content (plugin description / sample_interaction.assistant). Pure
# Python, no compilation. Rendered output is funneled through nh3 above.
"markdown-it-py>=3.0",
# Cross-platform advisory file locking for the `agnes push` single-instance
# guard. Wraps fcntl.flock on POSIX and msvcrt.locking on Windows behind
# a uniform API; OS releases the lock automatically on process exit (no
# stale-lock detection required). Used by cli/lib/push_lock.py.
"filelock>=3.13,<4",
# Transitive dependency hardened directly to dodge 5 dependabot advisories
# (4 high, 1 medium) flagged on urllib3<2.7.0: cross-origin sensitive
# header leak on proxied low-level redirects, decompression-bomb bypass
# + unbounded decompression chain on the streaming API, redirects-when-
# retries-disabled. The `[server]` extra below adds kbcstorage which
# transitively caps urllib3<2.0.0; `[tool.uv] override-dependencies`
# forces 2.7+ in workspace installs (Dockerfile + dev). Wheel consumers
# who install only the CLI (`uv tool install <wheel>`) get no kbcstorage
# and no conflict.
"urllib3>=2.7.0",
]
[project.optional-dependencies]
# Server-side connectors. The CLI wheel does NOT need these — analysts who
# `uv tool install` the wheel never reach a kbcstorage import. Splitting it
# out keeps the wheel's METADATA `Requires-Dist` set free of the
# `kbcstorage<=0.9.5 → urllib3<2.0.0` cap that conflicts with our
# `urllib3>=2.7.0` security pin under any fresh resolver context (where
# `[tool.uv] override-dependencies` does NOT apply — see comment on
# [tool.uv] below). Server install pulls it in via Dockerfile's
# `uv pip install --system --no-cache .[server]`.
server = [
"kbcstorage>=0.9.0",
]
observability = [
# Already in base dependencies — listed here so operators who want to
# be explicit can `pip install -e ".[observability]"` and signal intent.
"posthog>=3.7.0",
]
dev = [
"pytest>=9.0.0",
"pytest-timeout>=2.0.0",
"pytest-xdist>=3.0.0",
# pytest-split shards the suite across parallel CI jobs (`--splits N
# --group K`); see the `test-shard` matrix in `.github/workflows/ci.yml`.
# Balanced by the committed `.test_durations` file.
"pytest-split>=0.9.0",
"faker>=24.0.0",
# jsonschema validates the corporate-memory extraction-tool golden fixtures
# under tests/test_corporate_memory_v1.py (extraction.json, correction.json,
# confidence_calibration.json). Production code does not depend on it.
"jsonschema>=4.0.0",
# FastAPI debug toolbar — gated behind DEBUG=1 env var in app/main.py.
# Provides per-request panels (headers, routes, timer, profiling, etc.)
# for local development. Never loaded in production (no DEBUG=1 there).
"fastapi-debug-toolbar>=0.6.3",
]
[project.scripts]
agnes = "cli.main:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["app", "src", "connectors", "cli", "services", "config"]
[tool.ruff]
line-length = 120
target-version = "py313"
[tool.uv]
dev-dependencies = [
"pytest>=9.0.0",
"pytest-timeout>=2.0.0",
"pytest-xdist>=3.0.0",
"pytest-split>=0.9.0",
"faker>=24.0.0",
"anthropic>=0.30.0",
"openai>=1.30.0",
"fastapi-debug-toolbar>=0.6.3",
]
# Override the urllib3<2.0.0 ceiling kbcstorage 0.9.5 declares (upstream
# hasn't relaxed it as of 2026-05-12 but the SDK works fine against
# urllib3 2.x in practice — we only use `Client` + `Tables` from it and
# both go through `requests`, which natively supports both lines). Lets
# the resolver pick a urllib3 line that closes Dependabot advisories
# CVE-2024-37891 / CVE-2025-{xxx}. See `urllib3>=2.7.0` in [project]
# dependencies above for the security rationale.
override-dependencies = ["urllib3>=2.7.0"]