agnes-the-ai-analyst/tests/test_cli_fetch.py
ZdenekSrotyr 1563b05f2e refactor(cli): hard-cutover env vars + config dir to AGNES_*
Task 0.5 of clean-analyst-bootstrap. Greenfield rewrite — no fallback,
no aliases. Existing dev environments lose their cached PAT and must
re-authenticate.

Env var renames (hard cutover):
- DA_CONFIG_DIR    -> AGNES_CONFIG_DIR
- DA_SERVER        -> AGNES_SERVER
- DA_SERVER_URL    -> AGNES_SERVER_URL  (test-only stale ref, not in spec)
- DA_NO_UPDATE_CHECK -> AGNES_NO_UPDATE_CHECK
- DA_LOCAL_DIR     -> AGNES_LOCAL_DIR
- DA_TOKEN         -> AGNES_TOKEN
- DA_STREAM_RETRIES -> AGNES_STREAM_RETRIES

Config dir rename: ~/.config/da/ -> ~/.config/agnes/ (across code,
comments, docstrings, error messages, install templates, dev scripts).

Stale `da X` references in CLI source (and adjacent app/, tests/):
swept docstrings, comments, help text, and error messages where the
verb survives the rewrite (init, pull, push, catalog, status, diagnose,
auth, admin, skills, query, schema, describe, explore, disk-info,
snapshot, login, logout, whoami, server, setup) and replaced `da X`
with `agnes X`. Intentionally kept `da sync`, `da fetch`, `da analyst`,
`da metrics` — those verbs are removed in later tasks; the legacy
strings will be detected by `_LEGACY_STRINGS` (added in Task 2).

Test fixes:
- TestCLIVersion now asserts output starts with `agnes ` (was `da `).

Test results: 2675 passed, 25 skipped (full pytest run, excluding 9
pre-existing test_db.py / test_user_management.py / test_e2e_extract.py
/ test_cli_binary_rename.py failures unrelated to this rename).
2026-05-04 16:35:44 +02:00

122 lines
5.1 KiB
Python

# tests/test_cli_fetch.py
from typer.testing import CliRunner
from unittest.mock import patch, MagicMock
import pyarrow as pa
import json
import pytest
def _seed_local_dir(tmp_path):
"""Set up the user's agnes-data directory for the CLI to find."""
(tmp_path / "user" / "duckdb").mkdir(parents=True)
(tmp_path / "user" / "snapshots").mkdir(parents=True)
return tmp_path
@pytest.fixture
def cli_env(tmp_path, monkeypatch):
monkeypatch.setenv("AGNES_LOCAL_DIR", str(_seed_local_dir(tmp_path)))
yield tmp_path
def test_print_estimate_handles_none_values():
"""Regression: server returns None (not absent keys) for non-BQ tables.
`dict.get(k, default)` returns the default only when k is missing, not when
it maps to None. f-string format on None used to crash the CLI."""
from cli.commands.fetch import _print_estimate
# Should not raise
_print_estimate({
"estimated_scan_bytes": 0,
"estimated_result_rows": None,
"estimated_result_bytes": None,
"bq_cost_estimate_usd": None,
})
class TestDaFetchSafety:
def test_unsafe_snapshot_name_rejected(self, cli_env):
"""Regression: --as 'evil"; DROP ...' would inject into the local
DuckDB CREATE VIEW. Validate identifier early, exit 2."""
from cli.commands.fetch import fetch_app
runner = CliRunner()
result = runner.invoke(fetch_app, [
"bq_view", "--as", 'evil"; DROP TABLE x; --',
"--no-estimate",
])
assert result.exit_code == 2, result.stdout
assert "not a safe identifier" in (result.stdout + (result.stderr or ""))
def test_estimate_overrides_no_estimate(self, cli_env):
"""Regression: --estimate is a 'do-not-fetch' guarantee. When combined
with --no-estimate it must still bail out without calling the scan
endpoint — otherwise --estimate's cost-safety promise is silently
defeated."""
from cli.commands.fetch import fetch_app
with patch("cli.commands.fetch.api_post_arrow") as m_scan:
runner = CliRunner()
result = runner.invoke(fetch_app, [
"bq_view", "--estimate", "--no-estimate",
])
assert result.exit_code == 0, result.stdout
assert not m_scan.called, "api_post_arrow MUST NOT be called when --estimate is set"
class TestDaFetch:
def test_estimate_only_does_not_create_snapshot(self, cli_env, monkeypatch):
from cli.commands.fetch import fetch_app
with patch("cli.commands.fetch.api_post_json") as m:
m.return_value = {
"estimated_scan_bytes": 1_000_000,
"estimated_result_rows": 100,
"estimated_result_bytes": 1_000,
"bq_cost_estimate_usd": 0.0001,
}
runner = CliRunner()
result = runner.invoke(fetch_app, [
"bq_view",
"--select", "a,b",
"--where", "a > 1",
"--limit", "100",
"--estimate",
])
assert result.exit_code == 0, result.stdout
# No parquet should be created
assert not list((cli_env / "user" / "snapshots").glob("*.parquet"))
def test_fetch_creates_snapshot_with_meta(self, cli_env, monkeypatch):
from cli.commands.fetch import fetch_app
# Estimate path
with patch("cli.commands.fetch.api_post_json") as m_est, \
patch("cli.commands.fetch.api_post_arrow") as m_scan:
m_est.return_value = {
"estimated_scan_bytes": 1000,
"estimated_result_rows": 2,
"estimated_result_bytes": 100,
"bq_cost_estimate_usd": 0.0,
}
m_scan.return_value = pa.table({"a": [1, 2], "b": ["x", "y"]})
runner = CliRunner()
result = runner.invoke(fetch_app, [
"bq_view",
"--select", "a,b",
"--limit", "10",
"--no-estimate",
])
assert result.exit_code == 0, result.stdout
snap = cli_env / "user" / "snapshots" / "bq_view.parquet"
meta = cli_env / "user" / "snapshots" / "bq_view.meta.json"
assert snap.exists()
assert meta.exists()
assert json.loads(meta.read_text())["rows"] == 2
def test_fetch_existing_snapshot_without_force_fails(self, cli_env, monkeypatch):
from cli.commands.fetch import fetch_app
# Pre-create a snapshot
snap = cli_env / "user" / "snapshots" / "bq_view.parquet"
snap.write_bytes(b"PAR1\\x00\\x00PAR1")
meta = cli_env / "user" / "snapshots" / "bq_view.meta.json"
meta.write_text('{"name": "bq_view", "table_id": "bq_view", "select": null, "where": null, "limit": null, "order_by": null, "fetched_at": "x", "effective_as_of": "x", "rows": 0, "bytes_local": 0, "estimated_scan_bytes_at_fetch": 0, "result_hash_md5": ""}')
runner = CliRunner()
result = runner.invoke(fetch_app, ["bq_view", "--no-estimate"])
assert result.exit_code == 6, f"expected exit code 6 (snapshot_exists); got {result.exit_code}\n{result.stdout}"