agnes-the-ai-analyst/tests/test_self_upgrade.py

"""Tests for `agnes self-upgrade` — install path, smoke test, rollback
(with rc capture), recursion barrier, --force offline failure, AGNES_NO_UPDATE_CHECK
bypass for explicit upgrades, --quiet stderr behavior, version-mismatch
smoke detection."""

import os
import sys
from unittest.mock import patch, MagicMock

import pytest
from typer.testing import CliRunner

from cli.main import app
from cli.update_check import UpdateInfo

runner = CliRunner()


@pytest.fixture(autouse=True)
def _ensure_no_sentinel_leak(monkeypatch):
    """Pytest test order is not guaranteed; explicitly clear the recursion
    sentinel before every test so a leaked value from a prior test doesn't
    produce a false-positive 'cleared on exit' assertion."""
    monkeypatch.delenv("AGNES_SELF_UPGRADE_IN_PROGRESS", raising=False)
    yield


_OUTDATED_URL = "http://server.test/cli/wheel/agnes-0.40.0-py3-none-any.whl"
_PRIOR_URL = "http://server.test/cli/wheel/agnes-0.35.0-py3-none-any.whl"


def _outdated_info():
    return UpdateInfo(installed="0.30.0", latest="0.40.0", download_url=_OUTDATED_URL)


def _current_info():
    return UpdateInfo(installed="0.40.0", latest="0.40.0", download_url=None)


def _smoke_pass():
    return (True, "agnes 0.40.0")


def _smoke_fail():
    return (False, "exit 1: ImportError: cannot import name 'foo'")


def test_check_only_when_outdated_exits_1():
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()):
        result = runner.invoke(app, ["self-upgrade", "--check-only"])
        assert result.exit_code == 1
        assert "out of date" in result.output


def test_check_only_when_current_exits_0():
    with patch("cli.commands.self_upgrade.check", return_value=_current_info()):
        result = runner.invoke(app, ["self-upgrade", "--check-only"])
        assert result.exit_code == 0


def test_when_current_short_circuits_no_install():
    with patch("cli.commands.self_upgrade.check", return_value=_current_info()), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run:
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        mock_run.assert_not_called()


def test_uv_path_when_uv_available():
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"):
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        args = mock_run.call_args_list[0].args[0]
        assert args[:3] == ["uv", "tool", "install"]
        assert "--force" in args
        assert _OUTDATED_URL in args


def test_pip_fallback_uses_sys_executable_not_user():
    """pip path must target the running interpreter's venv, never --user."""
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value=None), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"):
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        cmds = [c.args[0] for c in mock_run.call_args_list]
        assert any(cmd[0] == "curl" for cmd in cmds), cmds
        pip_cmd = next(cmd for cmd in cmds if "pip" in cmd)
        assert pip_cmd[0] == sys.executable, pip_cmd
        assert "--force-reinstall" in pip_cmd
        assert "--user" not in pip_cmd


def test_force_invalidates_cache_before_check():
    """--force must drop the cached download_url before probing /cli/latest."""
    fresh_current_with_url = UpdateInfo(installed="0.40.0", latest="0.40.0",
                                        download_url=_OUTDATED_URL)
    with patch("cli.commands.self_upgrade._invalidate_update_cache") as mock_invalidate, \
         patch("cli.commands.self_upgrade.check", return_value=fresh_current_with_url) as mock_check, \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good"):
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade", "--force"])
        assert result.exit_code == 0
        assert mock_invalidate.call_count == 2
        mock_check.assert_called_once()


def test_force_offline_exits_1_with_stderr():
    """--force + server unreachable: exit 1 with explicit stderr."""
    with patch("cli.commands.self_upgrade.check", return_value=None), \
         patch("cli.commands.self_upgrade.get_server_url",
               return_value="http://server.test"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"):
        result = runner.invoke(app, ["self-upgrade", "--force"])
        assert result.exit_code == 1
        assert "cannot reach" in result.stderr
        assert "server.test" in result.stderr


def test_offline_without_force_is_silent():
    """No --force, server unreachable: exit 0 silently from self-upgrade
    itself. (The root callback's warning loop in cli/main.py may still emit
    `[update] …` to stderr — that's a separate code path; this test only
    pins that self-upgrade does not add a `cannot reach …` error.)"""
    with patch("cli.commands.self_upgrade.check", return_value=None), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"):
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        assert "cannot reach" not in result.stderr
        assert "self-upgrade:" not in result.stderr


def test_self_upgrade_passes_bypass_disabled_to_check():
    """AGNES_NO_UPDATE_CHECK silences the implicit warning loop, but
    explicit `agnes self-upgrade` must NOT be a silent no-op when set."""
    with patch("cli.commands.self_upgrade.check", return_value=_current_info()) as mock_check:
        result = runner.invoke(app, ["self-upgrade", "--check-only"])
        assert result.exit_code == 0
        kwargs = mock_check.call_args.kwargs
        assert kwargs.get("bypass_disabled") is True


def test_quiet_does_not_suppress_install_failure_stderr():
    """--quiet suppresses progress but install/smoke failures always surface."""
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None):
        mock_run.return_value = MagicMock(returncode=42)
        result = runner.invoke(app, ["self-upgrade", "--quiet"])
        assert result.exit_code == 1
        assert "install failed" in result.stderr


def test_smoke_fail_triggers_rollback_when_prior_url_known():
    """Broken new wheel: smoke fails, rollback to last-known-good URL, exit 1."""
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=_PRIOR_URL), \
         patch("cli.commands.self_upgrade._record_last_known_good") as mock_record:
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 1
        urls_installed = [
            arg for c in mock_run.call_args_list
            for arg in c.args[0] if isinstance(arg, str) and arg.startswith("http")
        ]
        assert _OUTDATED_URL in urls_installed
        assert _PRIOR_URL in urls_installed
        mock_record.assert_not_called()
        assert "smoke test" in result.stderr


def test_smoke_fail_with_rollback_failure_surfaces_rc():
    """Forward install ok, smoke fail, rollback ALSO fails: stderr surfaces rc + recovery."""
    install_results = [MagicMock(returncode=0), MagicMock(returncode=99)]
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run", side_effect=install_results), \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=_PRIOR_URL), \
         patch("cli.commands.self_upgrade.get_server_url",
               return_value="http://server.test"):
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 1
        assert "rollback ALSO failed" in result.stderr
        assert "rc=99" in result.stderr
        assert "/cli/install.sh" in result.stderr


def test_smoke_fail_no_prior_url_prints_install_sh_recovery():
    """First-ever upgrade with no rollback target: stderr points at bootstrap path."""
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade.get_server_url",
               return_value="http://server.test"):
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 1
        assert "/cli/install.sh" in result.stderr
        assert "server.test" in result.stderr


def test_smoke_pass_records_last_known_good_then_invalidates_cache():
    """Convention in `_do_install_with_smoke_and_rollback`: record, then
    invalidate. The OTHER invalidate call here (the FIRST one in call_order)
    is the pre-probe invalidate inside `_resolve_info` that ensures
    `agnes self-upgrade` always re-probes /cli/latest instead of trusting
    the 24h cache — see `test_self_upgrade_bypasses_24h_cache_without_force`.
    Both invalidates are intentional; we pin only the record→invalidate pair
    of the post-install bookkeeping by looking at the LAST invalidate."""
    call_order = []
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good",
               side_effect=lambda url: call_order.append(("record", url))), \
         patch("cli.commands.self_upgrade._invalidate_update_cache",
               side_effect=lambda: call_order.append(("invalidate", None))):
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        record_idx = next(i for i, c in enumerate(call_order) if c[0] == "record")
        # LAST invalidate — the post-install bookkeeping one.
        invalidate_idx = max(
            i for i, c in enumerate(call_order) if c[0] == "invalidate"
        )
        assert record_idx < invalidate_idx, call_order
        assert call_order[record_idx] == ("record", _OUTDATED_URL)


def test_self_upgrade_propagates_sentinel_to_smoke_subprocess():
    """The sentinel is set in os.environ during the run and cleared in finally."""
    captured_envs = []

    def _fake_smoke(method, expected_version):
        env = {**os.environ, "AGNES_NO_UPDATE_CHECK": "1",
               "AGNES_SELF_UPGRADE_IN_PROGRESS": "1"}
        captured_envs.append(env)
        return _smoke_pass()

    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run",
               return_value=MagicMock(returncode=0)), \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", side_effect=_fake_smoke), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"):
        result = runner.invoke(app, ["self-upgrade"])
    assert result.exit_code == 0
    assert captured_envs and captured_envs[0]["AGNES_SELF_UPGRADE_IN_PROGRESS"] == "1"
    assert os.environ.get("AGNES_SELF_UPGRADE_IN_PROGRESS") is None


@pytest.mark.parametrize("install_method,patch_target", [
    ("uv", "_uv_tool_bin_path"),
    ("pip", "_pip_bin_path"),
])
def test_smoke_test_detects_version_mismatch(install_method, patch_target):
    """Smoke test execs binary at install path (NOT shutil.which) and checks
    Version equality (NOT substring). Parametrized over uv + pip."""
    from pathlib import Path
    from cli.commands import self_upgrade as su

    fake_bin = f"/fake/{install_method}/bin/agnes"
    with patch.object(su, patch_target, return_value=Path(fake_bin)), \
         patch.object(su.subprocess, "run") as mock_run:
        mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.30.0\n", stderr="")
        ok, detail = su._smoke_test_new_binary(install_method, expected_version="0.40.0")
        assert ok is False
        assert "version mismatch" in detail
        assert "0.40.0" in detail and "0.30.0" in detail
        assert mock_run.call_args.args[0][0] == fake_bin


def test_self_upgrade_bypasses_24h_cache_without_force(tmp_path, monkeypatch):
    """Plain `agnes self-upgrade` (no --force) MUST re-probe /cli/latest
    even when the local update_check.json cache claims we're current.

    Pre-fix the cache short-circuited and the command was a silent no-op
    after a server bump within the 24h window. Empirically observed:
    prod 0.47.1 → 0.47.2 didn't propagate to clients with a fresh cache.
    """
    import json
    import time
    from cli.commands import self_upgrade as su
    from cli import update_check as uc

    # Redirect the on-disk cache to tmp_path via _config_dir's env override.
    monkeypatch.setenv("AGNES_CONFIG_DIR", str(tmp_path))

    # Arrange: stale cache claims installed=latest=0.47.1, written 1 minute
    # ago — well within the 24h positive-cache TTL.
    cache_path = tmp_path / "update_check.json"
    cache_path.write_text(json.dumps({
        "installed": "0.47.1",
        "server_url": "http://server.test",
        "latest": "0.47.1",
        "download_url": "http://server.test/cli/wheel/agnes-0.47.1-py3-none-any.whl",
        "checked_at": time.time() - 60,
    }), encoding="utf-8")

    # Mock the network probe to return 0.47.2 — the bumped server.
    monkeypatch.setattr(uc, "_fetch_latest", lambda url: {
        "version": "0.47.2",
        "download_url_path": "/cli/wheel/agnes-0.47.2-py3-none-any.whl",
    })
    # Pin the installed version to 0.47.1 (matches the stale cache).
    monkeypatch.setattr(uc, "_installed_version", lambda: "0.47.1")
    # Pin the server URL so the cache key matches.
    monkeypatch.setattr(su, "get_server_url", lambda: "http://server.test")

    # Act: explicit self-upgrade WITHOUT --force.
    info = su._resolve_info(force=False)

    # Assert: returns UpdateInfo carrying the FRESH 0.47.2, not cached 0.47.1.
    assert info is not None and not isinstance(info, su._Unreachable)
    assert info.latest == "0.47.2", (
        f"expected fresh probe to return 0.47.2; got {info.latest} "
        "(cache short-circuit regressed)"
    )
    assert info.installed == "0.47.1"
    assert info.download_url == (
        "http://server.test/cli/wheel/agnes-0.47.2-py3-none-any.whl"
    )

    # Assert: cache was rewritten with the fresh latest. Proves the probe
    # actually ran rather than the stale cache satisfying the call via
    # some other path that happened to leave 0.47.1 untouched on disk.
    refreshed = json.loads(cache_path.read_text(encoding="utf-8"))
    assert refreshed["latest"] == "0.47.2"


def test_smoke_test_passes_with_pep440_local_version():
    """Use Version() comparison, not substring (so "0.40.0" doesn't match "0.40.10")."""
    from pathlib import Path
    from cli.commands import self_upgrade as su

    with patch.object(su, "_uv_tool_bin_path", return_value=Path("/fake/agnes")), \
         patch.object(su.subprocess, "run") as mock_run:
        mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.40.0\n", stderr="")
        ok, _ = su._smoke_test_new_binary("uv", expected_version="0.40.0")
        assert ok is True
        mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.40.10\n", stderr="")
        ok, detail = su._smoke_test_new_binary("uv", expected_version="0.40.0")
        assert ok is False
        assert "version mismatch" in detail


# ---------------------------------------------------------------------------
# Workspace hook auto-refresh (PR #242 — ZdenekSrotyr #2 silent-stop fix)
# ---------------------------------------------------------------------------


def test_hook_refresh_fires_when_cli_already_current(monkeypatch):
    """The info-is-None fast path must still refresh hooks. Covers the
    v0.48→v0.49 migration moment when the operator already self-upgraded
    the CLI (so the second self-upgrade call from a SessionStart hook
    finds nothing to install), but their workspace settings.json was
    written by the older CLI version and lacks the new capture-session
    hook entry."""
    monkeypatch.setenv("AGNES_LOCAL_DIR", "/fake/workspace")
    with patch("cli.commands.self_upgrade.check", return_value=_current_info()), \
         patch("cli.commands.self_upgrade.maybe_refresh_claude_hooks") as mock_refresh:
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        mock_refresh.assert_called_once()


def test_hook_refresh_fires_after_successful_install(monkeypatch):
    """The install-success path must refresh hooks AFTER the new wheel is
    in place — so any wire-format change in the new release lands on the
    next session-start without re-running `agnes init`."""
    monkeypatch.setenv("AGNES_LOCAL_DIR", "/fake/workspace")
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
         patch("cli.commands.self_upgrade._record_last_known_good"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"), \
         patch("cli.commands.self_upgrade.maybe_refresh_claude_hooks") as mock_refresh:
        mock_run.return_value = MagicMock(returncode=0)
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0
        mock_refresh.assert_called_once()


def test_hook_refresh_skipped_on_install_failure(monkeypatch):
    """Failed install: do NOT refresh hooks — the rollback has already
    run and the workspace is in a known-prior state; rewriting hooks now
    could pin a layout that doesn't match the rolled-back binary."""
    monkeypatch.setenv("AGNES_LOCAL_DIR", "/fake/workspace")
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
         patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
         patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
         patch("cli.commands.self_upgrade._read_last_known_good", return_value=_PRIOR_URL), \
         patch("cli.commands.self_upgrade._record_last_known_good"), \
         patch("cli.commands.self_upgrade._invalidate_update_cache"), \
         patch("cli.commands.self_upgrade.maybe_refresh_claude_hooks") as mock_refresh:
        mock_run.return_value = MagicMock(returncode=0)  # install rc=0 but smoke failed
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 1
        mock_refresh.assert_not_called()


def test_hook_refresh_skipped_when_check_only(monkeypatch):
    """--check-only is read-only intent; never touch the workspace."""
    monkeypatch.setenv("AGNES_LOCAL_DIR", "/fake/workspace")
    with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
         patch("cli.commands.self_upgrade.maybe_refresh_claude_hooks") as mock_refresh:
        result = runner.invoke(app, ["self-upgrade", "--check-only"])
        # exit 1 because outdated — see test_check_only_when_outdated_exits_1
        assert result.exit_code == 1
        mock_refresh.assert_not_called()


def test_hook_refresh_failure_does_not_flip_exit_code(monkeypatch):
    """An exception inside maybe_refresh_claude_hooks must NOT turn a
    successful upgrade into rc=1. The refresh is best-effort."""
    monkeypatch.setenv("AGNES_LOCAL_DIR", "/fake/workspace")
    with patch("cli.commands.self_upgrade.check", return_value=_current_info()), \
         patch("cli.commands.self_upgrade.maybe_refresh_claude_hooks",
               side_effect=PermissionError("settings.json read-only")):
        result = runner.invoke(app, ["self-upgrade"])
        assert result.exit_code == 0