diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca64abd..3af036b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,13 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
+## [0.46.5] — 2026-05-07
+
+### Fixed
+
+- `agnes describe
-n 5` previously failed with `Missing argument 'TABLE_ID'` because the command was registered as a `Typer.Typer` subcommand group; the combination of positional `table_id` + short option `-n INTEGER` mis-parses in that pattern. Switched to a flat `@app.command("describe")` registration. All forms (`-n` before/after positional, `--rows=N`, default n=5) now parse correctly. Surfaced from a real analyst session following the CLAUDE.md "agent rails" discovery workflow.
+- `/api/v2/sample/` (called by `agnes describe`) returned HTTP 500 with `ValueError: Out of range float values are not JSON compliant: nan` when the result rows contained NaN values from the underlying DuckDB / BigQuery scan. The endpoint now sanitizes NaN/±inf to JSON `null` before serialization. Same surfaced from a real analyst session.
+
## [0.46.4] — 2026-05-07
### Fixed
diff --git a/app/api/v2_sample.py b/app/api/v2_sample.py
index 9a5ffab..08f5570 100644
--- a/app/api/v2_sample.py
+++ b/app/api/v2_sample.py
@@ -2,6 +2,7 @@
from __future__ import annotations
import logging
+import math
from fastapi import APIRouter, Depends, HTTPException, Query
import duckdb
@@ -18,6 +19,27 @@ _sample_cache = TTLCache(maxsize=512, ttl_seconds=3600)
_MAX_N = 100
+def _sanitize_for_json(obj):
+ """Recursively replace NaN / ±inf floats with None so the response
+ survives JSON serialization. FastAPI's default encoder rejects these
+ (``ValueError: Out of range float values are not JSON compliant``)
+ even though Python's stdlib ``json`` accepts them by default. NaNs
+ show up routinely in DuckDB / BigQuery scans (NULL → NaN through the
+ pandas DataFrame round-trip), so the endpoint must sanitize at the
+ data-prep boundary rather than rely on the serializer."""
+ if isinstance(obj, float):
+ if math.isnan(obj) or math.isinf(obj):
+ return None
+ return obj
+ if isinstance(obj, list):
+ return [_sanitize_for_json(x) for x in obj]
+ if isinstance(obj, tuple):
+ return tuple(_sanitize_for_json(x) for x in obj)
+ if isinstance(obj, dict):
+ return {k: _sanitize_for_json(v) for k, v in obj.items()}
+ return obj
+
+
def _fetch_bq_sample(bq, dataset: str, table: str, n: int) -> list[dict]:
"""Fetch up to `n` sample rows from a BQ table via the DuckDB BQ extension.
@@ -98,6 +120,7 @@ def build_sample(
finally:
c.close()
+ rows = _sanitize_for_json(rows)
payload = {"table_id": table_id, "rows": rows, "source": source_type}
_sample_cache.set(cache_key, payload)
return payload
diff --git a/cli/commands/describe.py b/cli/commands/describe.py
index cc531f2..184e100 100644
--- a/cli/commands/describe.py
+++ b/cli/commands/describe.py
@@ -1,22 +1,25 @@
-"""`agnes describe ` — schema + sample rows (spec §4.1)."""
+"""`agnes describe ` — schema + sample rows (spec §4.1).
+
+Registered as a flat ``@app.command("describe")`` in ``cli/main.py`` rather
+than as a ``Typer.Typer`` subcommand-group + callback. The group pattern
+mis-parses ``agnes describe TABLE -n 5`` (positional + short option with a
+separated INTEGER value) — Typer hands the "5" to the positional and then
+errors on a missing TABLE_ID. There were no actual subcommands of
+``describe`` to justify the group wrapping anyway. Issue surfaced from a
+real analyst session following the CLAUDE.md "agent rails" workflow.
+"""
import json as json_lib
import typer
from cli.v2_client import api_get_json, V2ClientError
-describe_app = typer.Typer(help="Show schema + sample rows for a table")
-
-@describe_app.callback(invoke_without_command=True)
def describe(
- ctx: typer.Context,
table_id: str = typer.Argument(...),
n: int = typer.Option(5, "-n", "--rows", help="Sample rows count"),
json: bool = typer.Option(False, "--json"),
):
"""Show schema + sample rows for a table."""
- if ctx.invoked_subcommand is not None:
- return
try:
sch = api_get_json(f"/api/v2/schema/{table_id}")
sam = api_get_json(f"/api/v2/sample/{table_id}", n=n)
diff --git a/cli/main.py b/cli/main.py
index 31b5a9c..a3dd3e6 100644
--- a/cli/main.py
+++ b/cli/main.py
@@ -40,7 +40,7 @@ from cli.commands.server import server_app
from cli.commands.explore import explore_app
from cli.commands.catalog import catalog_app
from cli.commands.schema import schema_app
-from cli.commands.describe import describe_app
+from cli.commands.describe import describe
from cli.commands.snapshot import snapshot_app
from cli.commands.disk_info import disk_info_app
from cli.commands.store import store_app
@@ -124,7 +124,7 @@ app.add_typer(server_app, name="server")
app.add_typer(explore_app, name="explore")
app.add_typer(catalog_app, name="catalog")
app.add_typer(schema_app, name="schema")
-app.add_typer(describe_app, name="describe")
+app.command("describe")(describe)
app.add_typer(snapshot_app, name="snapshot")
app.add_typer(disk_info_app, name="disk-info")
app.add_typer(store_app, name="store")
diff --git a/pyproject.toml b/pyproject.toml
index c39c431..5f58359 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "agnes-the-ai-analyst"
-version = "0.46.4"
+version = "0.46.5"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"
diff --git a/tests/test_cli_catalog.py b/tests/test_cli_catalog.py
index 87c363f..6543247 100644
--- a/tests/test_cli_catalog.py
+++ b/tests/test_cli_catalog.py
@@ -125,9 +125,9 @@ def test_da_describe_json_output():
return sample_payload
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
- from cli.commands.describe import describe_app
+ from cli.main import app
runner = CliRunner()
- result = runner.invoke(describe_app, ["--json", "orders"])
+ result = runner.invoke(app, ["describe", "--json", "orders"])
assert result.exit_code == 0
out = json.loads(result.stdout)
assert "schema" in out
@@ -160,9 +160,9 @@ def test_da_describe_human_output():
return sample_payload
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
- from cli.commands.describe import describe_app
+ from cli.main import app
runner = CliRunner()
- result = runner.invoke(describe_app, ["orders"])
+ result = runner.invoke(app, ["describe", "orders"])
assert result.exit_code == 0
assert "orders" in result.stdout
assert "id" in result.stdout
diff --git a/tests/test_cli_describe.py b/tests/test_cli_describe.py
new file mode 100644
index 0000000..fc6fe33
--- /dev/null
+++ b/tests/test_cli_describe.py
@@ -0,0 +1,76 @@
+"""CLI parsing regression tests for `agnes describe`.
+
+Pins the fix for the Typer.Typer subcommand-group → flat @app.command
+switch. Pre-fix, `agnes describe TABLE -n 5` failed with
+`Missing argument 'TABLE_ID'` because Typer ate the positional as the
+short-option's INTEGER value. Now all four invocation orders parse.
+"""
+
+from unittest.mock import patch
+
+from typer.testing import CliRunner
+
+
+_SCHEMA_PAYLOAD = {
+ "table_id": "orders",
+ "source_type": "keboola",
+ "sql_flavor": "duckdb",
+ "columns": [
+ {"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
+ ],
+ "partition_by": None,
+ "clustered_by": [],
+ "where_dialect_hints": {},
+}
+_SAMPLE_PAYLOAD = {"table_id": "orders", "rows": [{"id": 1}], "columns": ["id"]}
+
+
+def _fake_get(path, **kwargs):
+ if "schema" in path:
+ return _SCHEMA_PAYLOAD
+ return _SAMPLE_PAYLOAD
+
+
+def _invoke(args):
+ with patch("cli.commands.describe.api_get_json", side_effect=_fake_get):
+ from cli.main import app
+ return CliRunner().invoke(app, args)
+
+
+def test_describe_accepts_short_n_after_positional():
+ """`agnes describe TABLE -n 5` — pre-fix this hit Typer's
+ `Missing argument 'TABLE_ID'` because the short INTEGER option
+ swallowed the positional in subcommand-group mode."""
+ result = _invoke(["describe", "orders", "-n", "5"])
+ assert result.exit_code == 0, result.stdout
+ assert "Missing argument" not in result.stdout
+
+
+def test_describe_accepts_n_before_positional():
+ """`agnes describe -n 5 TABLE` — already worked pre-fix; pinned
+ to make sure the move to a flat command kept it working."""
+ result = _invoke(["describe", "-n", "5", "orders"])
+ assert result.exit_code == 0, result.stdout
+
+
+def test_describe_accepts_long_rows_with_equals():
+ """`agnes describe TABLE --rows=5` — long-option-with-= form."""
+ result = _invoke(["describe", "orders", "--rows=5"])
+ assert result.exit_code == 0, result.stdout
+
+
+def test_describe_default_n_is_5():
+ """`agnes describe TABLE` (no -n) defaults to 5; passes the param
+ through to /api/v2/sample. Verified by capturing the n= kwarg."""
+ captured = {}
+
+ def fake_get(path, **kwargs):
+ if "sample" in path:
+ captured["n"] = kwargs.get("n")
+ return _SCHEMA_PAYLOAD if "schema" in path else _SAMPLE_PAYLOAD
+
+ with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
+ from cli.main import app
+ result = CliRunner().invoke(app, ["describe", "orders"])
+ assert result.exit_code == 0, result.stdout
+ assert captured["n"] == 5
diff --git a/tests/test_v2_sample.py b/tests/test_v2_sample.py
index 07f89f7..3267190 100644
--- a/tests/test_v2_sample.py
+++ b/tests/test_v2_sample.py
@@ -83,6 +83,62 @@ class TestSampleEndpoint:
conn.close()
assert captured["n"] == 100
+ def test_sample_handles_nan_values_in_rows(self, reload_db, monkeypatch):
+ """Regression: rows containing NaN floats from a DuckDB / BigQuery
+ scan used to crash the response with `ValueError: Out of range
+ float values are not JSON compliant: nan`. The endpoint now
+ sanitizes NaN/±inf to None before returning the payload."""
+ import math
+ from app.api import v2_sample
+ v2_sample._sample_cache.clear()
+ monkeypatch.setattr(
+ v2_sample, "_fetch_bq_sample",
+ lambda bq, dataset, table, n: [
+ {"col": float("nan"), "ok": 1.0},
+ {"col": float("inf"), "ok": 2.0},
+ {"col": float("-inf"), "ok": 3.0},
+ ],
+ )
+ conn = reload_db.get_system_db()
+ try:
+ _seed(conn)
+ user = {"id": "admin1", "email": "a@x.com"}
+ data = v2_sample.build_sample(conn, user, "bq_view", n=3, bq=_bq())
+ finally:
+ conn.close()
+ assert data["rows"] == [
+ {"col": None, "ok": 1.0},
+ {"col": None, "ok": 2.0},
+ {"col": None, "ok": 3.0},
+ ]
+ # Belt-and-braces: payload must round-trip through stdlib json
+ # in strict mode (allow_nan=False) — that's what FastAPI's
+ # serializer enforces internally.
+ import json as _json
+ _json.dumps(data, allow_nan=False) # must not raise
+
+ def test_sample_handles_nested_nan_in_arrays(self, reload_db, monkeypatch):
+ """Sanitizer recurses into nested lists/dicts — array-typed BQ
+ cells with NaN inside also serialize cleanly."""
+ from app.api import v2_sample
+ v2_sample._sample_cache.clear()
+ monkeypatch.setattr(
+ v2_sample, "_fetch_bq_sample",
+ lambda *a, **kw: [{"arr": [1.0, float("nan"), 3.0],
+ "nested": {"x": float("inf")}}],
+ )
+ conn = reload_db.get_system_db()
+ try:
+ _seed(conn)
+ user = {"id": "admin1", "email": "a@x.com"}
+ data = v2_sample.build_sample(conn, user, "bq_view", n=1, bq=_bq())
+ finally:
+ conn.close()
+ assert data["rows"][0]["arr"] == [1.0, None, 3.0]
+ assert data["rows"][0]["nested"] == {"x": None}
+ import json as _json
+ _json.dumps(data, allow_nan=False)
+
def test_rbac_check_runs_before_cache(self, reload_db, monkeypatch):
"""Regression: cache check used to come before RBAC, leaking sample rows
cached by an authorized user to subsequent unauthorized callers."""