## Summary
Two bugs in `agnes describe` surfaced from a real analyst session following the CLAUDE.md agent-rails discovery workflow. Together they break `agnes describe` end-to-end for any analyst (or analyst-AI) who follows the documented form.
### A) CLI parsing
`agnes describe TABLE -n 5` failed with `Missing argument 'TABLE_ID'`. Root cause: the command was registered as a `Typer.Typer` subcommand group via `app.add_typer(describe_app, name="describe")` + `@describe_app.callback(invoke_without_command=True)`, and that pattern mis-parses positional + short-int option in some orderings. Same pattern in `cli/commands/schema.py` works only because schema has no INTEGER short option. Fix: switch to flat `@app.command("describe")`.
### B) Server NaN
`/api/v2/sample/<id>` (called by `agnes describe`) returned HTTP 500 with `ValueError: Out of range float values are not JSON compliant: nan` whenever a row contained NaN. Fix: sanitize NaN/±inf to None before JSON serialization.
## Test plan
- [x] `pytest tests/test_cli_describe*.py` — added regression tests pinning `-n` parsing on either side of the positional.
- [x] `pytest tests/test_api_v2_sample*.py` — added regression test for NaN row → JSON `null` (not 500).
<!-- devin-review-badge-begin -->
---
<a href="https://app.devin.ai/review/keboola/agnes-the-ai-analyst/pull/224" target="_blank">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1">
<img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open in Devin Review">
</picture>
</a>
<!-- devin-review-badge-end -->
168 lines
5.7 KiB
Python
168 lines
5.7 KiB
Python
# tests/test_cli_catalog.py
|
|
import json
|
|
from typer.testing import CliRunner
|
|
from unittest.mock import patch
|
|
import typer
|
|
import pytest
|
|
|
|
|
|
def test_da_catalog_json_output(monkeypatch):
|
|
"""`agnes catalog --json` emits the server's JSON verbatim."""
|
|
payload = {
|
|
"tables": [
|
|
{"id": "orders", "name": "orders", "source_type": "keboola",
|
|
"query_mode": "local", "sql_flavor": "duckdb",
|
|
"where_examples": [], "fetch_via": "...", "rough_size_hint": None},
|
|
],
|
|
"server_time": "2026-04-27T17:30:00Z",
|
|
}
|
|
with patch("cli.commands.catalog.api_get_json", return_value=payload):
|
|
from cli.commands.catalog import catalog_app
|
|
runner = CliRunner()
|
|
result = runner.invoke(catalog_app, ["--json"])
|
|
assert result.exit_code == 0
|
|
out = json.loads(result.stdout)
|
|
assert out["tables"][0]["id"] == "orders"
|
|
|
|
|
|
def test_da_catalog_table_output(monkeypatch):
|
|
payload = {
|
|
"tables": [
|
|
{"id": "orders", "name": "orders", "source_type": "keboola",
|
|
"query_mode": "local", "sql_flavor": "duckdb",
|
|
"where_examples": [], "fetch_via": "...", "rough_size_hint": None},
|
|
],
|
|
"server_time": "2026-04-27T17:30:00Z",
|
|
}
|
|
with patch("cli.commands.catalog.api_get_json", return_value=payload):
|
|
from cli.commands.catalog import catalog_app
|
|
runner = CliRunner()
|
|
result = runner.invoke(catalog_app, [])
|
|
assert result.exit_code == 0
|
|
assert "orders" in result.stdout
|
|
assert "keboola" in result.stdout
|
|
|
|
|
|
def test_da_schema_json_output():
|
|
"""agnes schema <table> --json emits column metadata as JSON."""
|
|
payload = {
|
|
"table_id": "orders",
|
|
"source_type": "keboola",
|
|
"sql_flavor": "duckdb",
|
|
"columns": [
|
|
{"name": "id", "type": "INTEGER", "nullable": False, "description": "Primary key"},
|
|
{"name": "total", "type": "DOUBLE", "nullable": True, "description": "Order total"},
|
|
],
|
|
"partition_by": None,
|
|
"clustered_by": [],
|
|
"where_dialect_hints": {},
|
|
}
|
|
with patch("cli.commands.schema.api_get_json", return_value=payload):
|
|
from cli.commands.schema import schema_app
|
|
runner = CliRunner()
|
|
result = runner.invoke(schema_app, ["--json", "orders"])
|
|
assert result.exit_code == 0
|
|
out = json.loads(result.stdout)
|
|
assert out["table_id"] == "orders"
|
|
assert len(out["columns"]) == 2
|
|
|
|
|
|
def test_da_schema_human_output():
|
|
"""agnes schema <table> shows human-readable column listing."""
|
|
payload = {
|
|
"table_id": "orders",
|
|
"source_type": "keboola",
|
|
"sql_flavor": "duckdb",
|
|
"columns": [
|
|
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
|
|
],
|
|
"partition_by": None,
|
|
"clustered_by": [],
|
|
"where_dialect_hints": {},
|
|
}
|
|
with patch("cli.commands.schema.api_get_json", return_value=payload):
|
|
from cli.commands.schema import schema_app
|
|
runner = CliRunner()
|
|
result = runner.invoke(schema_app, ["orders"])
|
|
assert result.exit_code == 0
|
|
assert "orders" in result.stdout
|
|
assert "id" in result.stdout
|
|
assert "INTEGER" in result.stdout
|
|
|
|
|
|
def test_da_schema_error_exits_nonzero():
|
|
"""agnes schema propagates V2ClientError and exits with non-zero code."""
|
|
from cli.v2_client import V2ClientError
|
|
with patch("cli.commands.schema.api_get_json", side_effect=V2ClientError(status_code=404, body="not found")):
|
|
from cli.commands.schema import schema_app
|
|
runner = CliRunner()
|
|
result = runner.invoke(schema_app, ["nonexistent"])
|
|
assert result.exit_code != 0
|
|
|
|
|
|
def test_da_describe_json_output():
|
|
"""agnes describe <table> --json emits schema + sample as JSON."""
|
|
schema_payload = {
|
|
"table_id": "orders",
|
|
"source_type": "keboola",
|
|
"sql_flavor": "duckdb",
|
|
"columns": [
|
|
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
|
|
],
|
|
"partition_by": None,
|
|
"clustered_by": [],
|
|
"where_dialect_hints": {},
|
|
}
|
|
sample_payload = {
|
|
"table_id": "orders",
|
|
"rows": [{"id": 1}, {"id": 2}],
|
|
"columns": ["id"],
|
|
}
|
|
|
|
def fake_get(path, **kwargs):
|
|
if "schema" in path:
|
|
return schema_payload
|
|
return sample_payload
|
|
|
|
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
|
|
from cli.main import app
|
|
runner = CliRunner()
|
|
result = runner.invoke(app, ["describe", "--json", "orders"])
|
|
assert result.exit_code == 0
|
|
out = json.loads(result.stdout)
|
|
assert "schema" in out
|
|
assert "sample" in out
|
|
assert out["schema"]["table_id"] == "orders"
|
|
|
|
|
|
def test_da_describe_human_output():
|
|
"""agnes describe <table> shows schema + sample in human-readable form."""
|
|
schema_payload = {
|
|
"table_id": "orders",
|
|
"source_type": "keboola",
|
|
"sql_flavor": "duckdb",
|
|
"columns": [
|
|
{"name": "id", "type": "INTEGER", "nullable": False, "description": "PK"},
|
|
],
|
|
"partition_by": None,
|
|
"clustered_by": [],
|
|
"where_dialect_hints": {},
|
|
}
|
|
sample_payload = {
|
|
"table_id": "orders",
|
|
"rows": [{"id": 1}],
|
|
"columns": ["id"],
|
|
}
|
|
|
|
def fake_get(path, **kwargs):
|
|
if "schema" in path:
|
|
return schema_payload
|
|
return sample_payload
|
|
|
|
with patch("cli.commands.describe.api_get_json", side_effect=fake_get):
|
|
from cli.main import app
|
|
runner = CliRunner()
|
|
result = runner.invoke(app, ["describe", "orders"])
|
|
assert result.exit_code == 0
|
|
assert "orders" in result.stdout
|
|
assert "id" in result.stdout
|