agnes-the-ai-analyst/tests/test_cli_catalog.py
ZdenekSrotyr 751cc25327
release: 0.46.5 — agnes describe -n parses, server sanitizes NaN (#224)
## Summary

Two bugs in `agnes describe` surfaced from a real analyst session following the CLAUDE.md agent-rails discovery workflow. Together they break `agnes describe` end-to-end for any analyst (or analyst-AI) who follows the documented form.

### A) CLI parsing

`agnes describe TABLE -n 5` failed with `Missing argument 'TABLE_ID'`. Root cause: the command was registered as a `Typer.Typer` subcommand group via `app.add_typer(describe_app, name="describe")` + `@describe_app.callback(invoke_without_command=True)`, and that pattern mis-parses positional + short-int option in some orderings. Same pattern in `cli/commands/schema.py` works only because schema has no INTEGER short option. Fix: switch to flat `@app.command("describe")`.

### B) Server NaN

`/api/v2/sample/<id>` (called by `agnes describe`) returned HTTP 500 with `ValueError: Out of range float values are not JSON compliant: nan` whenever a row contained NaN. Fix: sanitize NaN/±inf to None before JSON serialization.

## Test plan

- [x] `pytest tests/test_cli_describe*.py` — added regression tests pinning `-n` parsing on either side of the positional.
- [x] `pytest tests/test_api_v2_sample*.py` — added regression test for NaN row → JSON `null` (not 500).
<!-- devin-review-badge-begin -->

---

<a href="https://app.devin.ai/review/keboola/agnes-the-ai-analyst/pull/224" target="_blank">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1">
    <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open in Devin Review">
  </picture>
</a>
<!-- devin-review-badge-end -->
2026-05-07 18:16:21 +02:00

168 lines
5.7 KiB
Python

# tests/test_cli_catalog.py
import json
from typer.testing import CliRunner
from unittest.mock import patch
import typer
import pytest
def test_da_catalog_json_output(monkeypatch):
"""`agnes catalog --json` emits the server's JSON verbatim."""
payload = {
"tables": [
{"id": "orders", "name": "orders", "source_type": "keboola",
"query_mode": "local", "sql_flavor": "duckdb",
"where_examples": [], "fetch_via": "...", "rough_size_hint": None},
],
"server_time": "2026-04-27T17:30:00Z",
}
with patch("cli.commands.catalog.api_get_json", return_value=payload):
from cli.commands.catalog import catalog_app
runner = CliRunner()
result = runner.invoke(catalog_app, ["--json"])
assert result.exit_code == 0
out = json.loads(result.stdout)
assert out["tables"][0]["id"] == "orders"
def test_da_catalog_table_output(monkeypatch):
payload = {
"tables": [
{"id": "orders", "name": "orders", "source_type": "keboola",
"query_mode": "local", "sql_flavor": "duckdb",
"where_examples": [], "fetch_via": "...", "rough_size_hint": None},
],
"server_time": "2026-04-27T17:30:00Z",
}
with patch("cli.commands.catalog.api_get_json", return_value=payload):
from cli.commands.catalog import catalog_app
runner = CliRunner()
result = runner.invoke(catalog_app, [])
assert result.exit_code == 0
assert "orders" in result.stdout
assert "keboola" in result.stdout
def test_da_schema_json_output():
"""agnes schema <table> --json emits column metadata as JSON."""
payload = {
"table_id": "orders",
"source_type": "keboola",
"sql_flavor": "duckdb",
"columns": [
{"name": "id", "type": "INTEGER", "nullable": False, "description": "Primary key"},
{"name": "total", "type": "DOUBLE", "nullable": True, "description": "Order total"},
],
"partition_by": None,
"clustered_by": [],
"where_dialect_hints": {},
}
with patch("cli.commands.schema.api_get_json", return_value=payload):
from cli.commands.schema import schema_app
runner = CliRunner()
result = runner.invoke(schema_app, ["--json", "orders"])
assert result.exit_code == 0
out = json.loads(result.stdout)
assert out["table_id"] == "orders"
assert len(out["columns"]) == 2
def test_da_schema_human_output():
"""agnes schema <table> shows human-readable column listing."""
payload = {
"table_id": "orders",
"source_type": "keboola",
"sql_flavor": "duckdb",
"columns": [
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
],
"partition_by": None,
"clustered_by": [],
"where_dialect_hints": {},
}
with patch("cli.commands.schema.api_get_json", return_value=payload):
from cli.commands.schema import schema_app
runner = CliRunner()
result = runner.invoke(schema_app, ["orders"])
assert result.exit_code == 0
assert "orders" in result.stdout
assert "id" in result.stdout
assert "INTEGER" in result.stdout
def test_da_schema_error_exits_nonzero():
"""agnes schema propagates V2ClientError and exits with non-zero code."""
from cli.v2_client import V2ClientError
with patch("cli.commands.schema.api_get_json", side_effect=V2ClientError(status_code=404, body="not found")):
from cli.commands.schema import schema_app
runner = CliRunner()
result = runner.invoke(schema_app, ["nonexistent"])
assert result.exit_code != 0
def test_da_describe_json_output():
"""agnes describe <table> --json emits schema + sample as JSON."""
schema_payload = {
"table_id": "orders",
"source_type": "keboola",
"sql_flavor": "duckdb",
"columns": [
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
],
"partition_by": None,
"clustered_by": [],
"where_dialect_hints": {},
}
sample_payload = {
"table_id": "orders",
"rows": [{"id": 1}, {"id": 2}],
"columns": ["id"],
}
def fake_get(path, **kwargs):
if "schema" in path:
return schema_payload
return sample_payload
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
from cli.main import app
runner = CliRunner()
result = runner.invoke(app, ["describe", "--json", "orders"])
assert result.exit_code == 0
out = json.loads(result.stdout)
assert "schema" in out
assert "sample" in out
assert out["schema"]["table_id"] == "orders"
def test_da_describe_human_output():
"""agnes describe <table> shows schema + sample in human-readable form."""
schema_payload = {
"table_id": "orders",
"source_type": "keboola",
"sql_flavor": "duckdb",
"columns": [
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
],
"partition_by": None,
"clustered_by": [],
"where_dialect_hints": {},
}
sample_payload = {
"table_id": "orders",
"rows": [{"id": 1}],
"columns": ["id"],
}
def fake_get(path, **kwargs):
if "schema" in path:
return schema_payload
return sample_payload
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
from cli.main import app
runner = CliRunner()
result = runner.invoke(app, ["describe", "orders"])
assert result.exit_code == 0
assert "orders" in result.stdout
assert "id" in result.stdout