From ee83cebbda55a4c7cc5b8d5decd0712243f127f2 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 20:45:29 +0200 Subject: [PATCH] fix(cli): Windows console crash on cs-CZ codepage (port + broaden #172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports Minas's PR #172 (against pre-rename `da` CLI on main) and applies the principle to the post-rename `agnes` CLI. Two distinct failure modes on Windows consoles whose default codepage is cp1250 (cs-CZ) / cp1252 (en-US): 1. `agnes pull` and other Rich-progress codepaths UnicodeEncodeError on Braille spinner glyphs. Fix: `cli/main.py` reconfigures stdout/stderr to UTF-8 with errors='replace' at import time on `sys.platform == 'win32'` so Rich's legacy-Windows render path emits decodable bytes. Wrapped in try/except so pytest's captured streams (which aren't TextIOWrapper) don't break. 2. `agnes skills list` and `agnes skills show` UnicodeDecodeError when reading skill markdown containing em-dashes / accented chars. Default `Path.read_text()` uses locale.getpreferredencoding(False), which is the broken codepage on Windows. Fix: every call site passes encoding='utf-8' explicitly. Broader scope than #172 because: - The bootstrap rewrite renamed/removed several files Minas's PR patched (`cli/commands/analyst.py` -> rolled into init.py; `cli/commands/sync.py` -> split into pull/push). Those targets no longer exist; the equivalent code lives in init.py. - Other call sites Minas didn't touch (still bare in his branch) are patched here too — config.py / update_check.py / snapshot_meta.py / setup.py / skills.py — so the codebase has zero locale-default text I/O in cli/. Side cleanup: stale `Run `da`` reference in snapshot_meta.py:88 fixed to `agnes` while touching the file. --- CHANGELOG.md | 1 + cli/commands/init.py | 2 +- cli/commands/setup.py | 2 +- cli/commands/skills.py | 4 ++-- cli/config.py | 14 +++++++------- cli/main.py | 16 ++++++++++++++++ cli/snapshot_meta.py | 8 ++++---- cli/update_check.py | 4 ++-- 8 files changed, 34 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a30baee..4cccd4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n - `agnes snapshot create` (formerly `da fetch`) no longer materializes an empty `user/duckdb/analytics.duckdb` when run before any `agnes pull`. Friendly hint redirects to `agnes pull`. - Workspace `agnes status` reads from the canonical `server/parquet/` and `user/duckdb/analytics.duckdb` paths (was reading legacy `data/parquet/`, `data/metadata/last_sync.json`). - `agnes init` and `agnes pull` errors now use the `cli/error_render.py` typed-error renderer (added in 0.32.0), so analyst-facing error UX matches the structured shape `agnes query --remote` already produces. +- **Windows: `agnes` CLI no longer crashes on cs-CZ / non-UTF-8 consoles.** Two failure modes addressed (originally reported in #172 against the pre-rename `da` CLI; ported and broadened here): (1) `agnes pull` and any other Rich-progress-bar codepath crashed with `UnicodeEncodeError` because cp1250 / cp1252 cannot encode Rich's Braille spinner glyphs — `cli/main.py` now reconfigures `sys.stdout` / `sys.stderr` to UTF-8 with `errors="replace"` at import time when `sys.platform == "win32"`. (2) `agnes skills list` and `agnes skills show` crashed with `UnicodeDecodeError` reading skill markdown that contains em-dashes / accents — every `Path.read_text()` / `Path.write_text()` / `open()` call site in `cli/` (including ones not touched by #172, since several files were renamed in the bootstrap rewrite) now passes `encoding="utf-8"` explicitly. Defensive: also covers JSON / YAML config files that were ASCII-only in practice but were one non-ASCII value away from the same failure mode. - `agnes snapshot create … --estimate` in a pre-init directory no longer leaks an httpx `ConnectError` traceback to stderr. The estimate-guard fix (3d587681) let `--estimate` reach `api_post_json`, but the existing `except V2ClientError` clause didn't catch transport-layer errors when no server was configured (defaulted to `http://localhost:8000`). Now also catches `httpx.HTTPError` and renders the friendly hint `Run \`agnes init …\` first`. - `agnes push` now reads Claude Code session jsonls from `~/.claude/projects//` (where Claude Code actually writes them), instead of `/user/sessions/` (which the SessionEnd hook never populated — the previous code uploaded an empty list every time). Encoding logic in `cli/lib/claude_sessions.py` probes both Claude Code variants — older `/`→`-` and newer all-non-alphanumeric→`-` — and unions the result, so users who have upgraded Claude Code mid-project see sessions from both encoded dirs. Falls back to `/user/sessions/` for back-compat. diff --git a/cli/commands/init.py b/cli/commands/init.py index 8053e18..f7e6154 100644 --- a/cli/commands/init.py +++ b/cli/commands/init.py @@ -155,7 +155,7 @@ def init( settings_path.write_text(json.dumps( {"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}}, indent=2, - )) + ), encoding="utf-8") install_claude_hooks(workspace) # ------------------------------------------------------------------ diff --git a/cli/commands/setup.py b/cli/commands/setup.py index e4aa5cc..668e402 100644 --- a/cli/commands/setup.py +++ b/cli/commands/setup.py @@ -23,7 +23,7 @@ def setup_init( import yaml config = {"server": server} - config_file.write_text(yaml.dump(config)) + config_file.write_text(yaml.dump(config), encoding="utf-8") typer.echo(f"Config saved to {config_file}") os.environ["AGNES_SERVER"] = server typer.echo("\nNext: agnes setup bootstrap --email admin@company.com") diff --git a/cli/commands/skills.py b/cli/commands/skills.py index 3718d65..f54c3e5 100644 --- a/cli/commands/skills.py +++ b/cli/commands/skills.py @@ -18,7 +18,7 @@ def list_skills(): for f in sorted(SKILLS_DIR.glob("*.md")): name = f.stem # Read first line as description - first_line = f.read_text().split("\n")[0].strip("# ").strip() + first_line = f.read_text(encoding="utf-8").split("\n")[0].strip("# ").strip() typer.echo(f" {name:25s} {first_line}") @@ -29,4 +29,4 @@ def show_skill(name: str = typer.Argument(..., help="Skill name to display")): if not skill_file.exists(): typer.echo(f"Skill '{name}' not found. Run: agnes skills list", err=True) raise typer.Exit(1) - typer.echo(skill_file.read_text()) + typer.echo(skill_file.read_text(encoding="utf-8")) diff --git a/cli/config.py b/cli/config.py index 5c31e0a..e20c425 100644 --- a/cli/config.py +++ b/cli/config.py @@ -20,7 +20,7 @@ def get_server_url() -> str: def get_token() -> Optional[str]: token_file = _config_dir() / "token.json" if token_file.exists(): - data = json.loads(token_file.read_text()) + data = json.loads(token_file.read_text(encoding="utf-8")) return data.get("access_token") return os.environ.get("AGNES_TOKEN") @@ -37,7 +37,7 @@ def save_token(token: str, email: str, role: Optional[str] = None): token_file.write_text(json.dumps({ "access_token": token, "email": email, - }, indent=2)) + }, indent=2), encoding="utf-8") def clear_token(): @@ -50,20 +50,20 @@ def load_config() -> dict: config_file = _config_dir() / "config.yaml" if config_file.exists(): import yaml - return yaml.safe_load(config_file.read_text()) or {} + return yaml.safe_load(config_file.read_text(encoding="utf-8")) or {} return {} def get_sync_state() -> dict: state_file = _config_dir() / "sync_state.json" if state_file.exists(): - return json.loads(state_file.read_text()) + return json.loads(state_file.read_text(encoding="utf-8")) return {} def save_sync_state(state: dict): state_file = _config_dir() / "sync_state.json" - state_file.write_text(json.dumps(state, indent=2)) + state_file.write_text(json.dumps(state, indent=2), encoding="utf-8") def save_config(data: dict): @@ -73,6 +73,6 @@ def save_config(data: dict): config_file = _config_dir() / "config.yaml" existing = {} if config_file.exists(): - existing = yaml.safe_load(config_file.read_text()) or {} + existing = yaml.safe_load(config_file.read_text(encoding="utf-8")) or {} existing.update(data) - config_file.write_text(yaml.dump(existing, default_flow_style=False)) + config_file.write_text(yaml.dump(existing, default_flow_style=False), encoding="utf-8") diff --git a/cli/main.py b/cli/main.py index d61a1ff..67f59ac 100644 --- a/cli/main.py +++ b/cli/main.py @@ -3,11 +3,27 @@ Primary interface for AI agents. Install: uv tool install agnes-the-ai-analyst """ +import sys from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _pkg_version import typer +# Force UTF-8 on Windows stdout/stderr at import time. The default Windows +# console codepage (cp1250 on cs-CZ, cp1252 on en-US, …) cannot encode the +# Braille spinner glyphs Rich uses for `agnes pull` progress, nor the +# em-dash / accented chars that show up in skill markdown via +# `agnes skills list`. Both crash with UnicodeEncodeError / +# UnicodeDecodeError before any command-level code runs. `reconfigure` is +# a no-op on non-TextIOWrapper streams (pytest capture, pipes wrapped by +# other tooling) — swallow the AttributeError there. +if sys.platform == "win32": + try: + sys.stdout.reconfigure(encoding="utf-8", errors="replace") + sys.stderr.reconfigure(encoding="utf-8", errors="replace") + except (AttributeError, OSError): + pass + from cli.commands.auth import auth_app from cli.commands.init import init_app from cli.commands.pull import pull_app diff --git a/cli/snapshot_meta.py b/cli/snapshot_meta.py index 79b2c19..96ca26d 100644 --- a/cli/snapshot_meta.py +++ b/cli/snapshot_meta.py @@ -39,7 +39,7 @@ def _meta_path(snap_dir: Path, name: str) -> Path: def write_meta(snap_dir: Path, meta: SnapshotMeta) -> None: snap_dir.mkdir(parents=True, exist_ok=True) - with _meta_path(snap_dir, meta.name).open("w") as f: + with _meta_path(snap_dir, meta.name).open("w", encoding="utf-8") as f: json.dump(asdict(meta), f, indent=2) @@ -47,7 +47,7 @@ def read_meta(snap_dir: Path, name: str) -> Optional[SnapshotMeta]: p = _meta_path(snap_dir, name) if not p.exists(): return None - data = json.loads(p.read_text()) + data = json.loads(p.read_text(encoding="utf-8")) return SnapshotMeta(**data) @@ -57,7 +57,7 @@ def list_snapshots(snap_dir: Path) -> list[SnapshotMeta]: out = [] for meta_file in snap_dir.glob("*.meta.json"): try: - data = json.loads(meta_file.read_text()) + data = json.loads(meta_file.read_text(encoding="utf-8")) out.append(SnapshotMeta(**data)) except (json.JSONDecodeError, TypeError): continue @@ -85,7 +85,7 @@ def snapshot_lock(snap_dir: Path): if _fcntl is None: raise RuntimeError( "snapshot_lock requires POSIX fcntl — Windows is not supported. " - "Run `da` from a Mac or Linux machine, or use a WSL shell." + "Run `agnes` from a Mac or Linux machine, or use a WSL shell." ) snap_dir.mkdir(parents=True, exist_ok=True) lock_file = snap_dir / ".lock" diff --git a/cli/update_check.py b/cli/update_check.py index d062008..d278218 100644 --- a/cli/update_check.py +++ b/cli/update_check.py @@ -88,7 +88,7 @@ def _read_cache() -> Optional[dict]: if not p.exists(): return None try: - return json.loads(p.read_text()) + return json.loads(p.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None @@ -97,7 +97,7 @@ def _write_cache(entry: dict) -> None: p = _cache_path() try: p.parent.mkdir(parents=True, exist_ok=True) - p.write_text(json.dumps(entry)) + p.write_text(json.dumps(entry), encoding="utf-8") except OSError: pass # best-effort — cache failure must not break the flow