diff --git a/CHANGELOG.md b/CHANGELOG.md index 20f8e8b..73fd64c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,10 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C - **Asana connector reverted from hosted Remote MCP back to PAT + raw REST against `app.asana.com/api/1.0`.** The MCP path (introduced in commit `adee8ea`, 2026-05-11) used ~5× the tokens per call because Claude Code reads the entire MCP response envelope; the PAT + REST path lets the agent read only the fields it needs from a flat JSON response. The new Asana prompt stores the PAT in the OS keychain under `agnes-asana-pat`, verifies against `/users/me` before writing, and prints the unified `✅`/`❌` line. Re-running setup on an instance still holding the leftover MCP registration detects it and asks the user to run `claude mcp remove asana` first so the two surfaces don't compete. - **Atlassian connector instructs picking the longest API-token expiry (today: "1 year").** The Atlassian Cloud token-create dropdown defaults to a short-lived expiry; the prompt now tells Claude to direct the user to choose the longest option in the "Expires" dropdown. There's no public query-parameter hook on `id.atlassian.com/manage-profile/security/api-tokens` to pre-select the expiry (verified — `?expiry=1y` returns identical HTML); the prompt acknowledges that limitation so a future contributor doesn't re-investigate. +### Removed + +- **BREAKING: `agnes query --register-bq` CLI flag removed.** The flag ran the `RemoteQueryEngine` in-process on the caller's machine and required local BigQuery credentials (`BIGQUERY_PROJECT` + ADC) that analysts don't have. Calling it from an analyst workspace surfaced as a confusing `not_configured` error chain ("Could not load static instance.yaml" + "BigQuery project not configured"), and an agent following CLAUDE.md guidance for hybrid queries would land in exactly that trap. The underlying engine was originally designed server-side ("Step 28: Remote query architecture", commit `d180b201`); the CLI port (`d605e7d9`) silently assumed parity. Analysts now have two paths for combining local and remote data: `agnes snapshot create` a filtered slice of the remote table and join it locally, or run the join server-side via `agnes query --remote`. Admins keep an unchanged server-side path via `POST /api/query/hybrid` (`app/api/query_hybrid.py`). Removed: `--register-bq` flag, `register_bq` field in `--stdin` JSON, `_query_hybrid()` in `cli/commands/query.py`. CLAUDE.md "Hybrid Queries" section rewritten; `cli/skills/agnes-data-querying.md` and `docs/DATA_SOURCES.md` updated to drop the flag. + ## [0.53.1] — 2026-05-12 Follow-up to 0.53.0 closing #266 — `/admin/tables` Edit modal on BQ diff --git a/CLAUDE.md b/CLAUDE.md index f567f26..a4fcc5b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -210,7 +210,6 @@ Tables in `agnes catalog` have a `query_mode`: 5 GiB scan cap (configurable in /admin/server-config). Direct `bq."".""` paths are registry-gated — unregistered paths return 403 `bq_path_not_registered`. - 3. **`agnes query --register-bq`** for hybrid joins (rarely needed). ### `agnes snapshot create` workflow (preferred for remote tables) @@ -391,19 +390,17 @@ analysts whose first attempt failed and need to retry by hand). ## Hybrid Queries (BigQuery + Local) -For tables too large to sync locally, use hybrid queries that JOIN local data with on-demand BigQuery results: +Server-side only. Admins can POST `{sql, register_bq: {alias: bq_sql}}` to +`/api/query/hybrid` (see `app/api/query_hybrid.py`), which runs the BQ +sub-queries server-side (where BQ credentials live) and joins the result +against the server's local parquet views in a single DuckDB session. -```bash -agnes query --sql "SELECT o.*, t.views FROM orders o JOIN traffic t ON o.date = t.date" \ - --register-bq "traffic=SELECT date, SUM(views) as views FROM dataset.web WHERE date > '2026-01-01' GROUP BY 1" -``` - -The `--register-bq` flag executes a BigQuery subquery, loads the result into memory, and makes it available as a DuckDB view for the final SQL. Multiple `--register-bq` flags can be used for multiple BQ sources. - -For complex SQL, use stdin mode: -```bash -echo '{"register_bq": {"traffic": "SELECT ..."}, "sql": "SELECT ..."}' | agnes query --stdin -``` +There is no analyst-facing CLI flag for this — analysts who need to combine +a local table with a remote one should `agnes snapshot create` a filtered +subset of the remote table and `agnes query` the join locally, or run the +join server-side via `agnes query --remote`. The earlier `agnes query +--register-bq` flag ran in-process on the caller's machine and required +local BigQuery credentials that analysts don't have; it was removed. ## Extensibility diff --git a/cli/commands/query.py b/cli/commands/query.py index 8c06bd5..3a455aa 100644 --- a/cli/commands/query.py +++ b/cli/commands/query.py @@ -5,7 +5,7 @@ import os import re import sys from pathlib import Path -from typing import List, Optional +from typing import Optional import typer @@ -16,11 +16,6 @@ def query_command( remote: bool = typer.Option(False, "--remote", help="Execute on server instead of locally"), fmt: str = typer.Option("table", "--format", "-f", help="Output format: table, json, csv"), limit: int = typer.Option(1000, "--limit", help="Max rows to return"), - register_bq: Optional[List[str]] = typer.Option( - None, - "--register-bq", - help="Register a BigQuery result as a DuckDB view. Format: alias=BQ_SQL. Can be repeated.", - ), stdin: bool = typer.Option(False, "--stdin", help="Read SQL from stdin as JSON {\"sql\": \"...\"}"), ): """Execute SQL query against DuckDB.""" @@ -42,10 +37,6 @@ def query_command( try: payload = json.loads(raw) resolved_sql = payload["sql"] - # Extract register_bq from stdin JSON - stdin_bq = payload.get("register_bq", {}) - if stdin_bq and isinstance(stdin_bq, dict): - register_bq = [f"{k}={v}" for k, v in stdin_bq.items()] except (json.JSONDecodeError, KeyError) as exc: typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True) raise typer.Exit(1) @@ -54,9 +45,7 @@ def query_command( else: resolved_sql = sql - if register_bq: - _query_hybrid(resolved_sql, fmt, limit, register_bq) - elif remote: + if remote: _query_remote(resolved_sql, fmt, limit) else: _query_local(resolved_sql, fmt, limit) @@ -132,77 +121,6 @@ def _query_remote(sql: str, fmt: str, limit: int): typer.echo(f"(truncated at {limit} rows)", err=True) -def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]): - """Run a hybrid query: register BigQuery results as DuckDB views, then execute locally.""" - import duckdb - from src.remote_query import RemoteQueryEngine, RemoteQueryError, load_config - - local_dir = Path(os.environ.get("AGNES_LOCAL_DIR", ".")) - db_path = local_dir / "user" / "duckdb" / "analytics.duckdb" - if not db_path.exists(): - typer.echo("Local DuckDB not found. Run: agnes pull", err=True) - raise typer.Exit(1) - - conn = duckdb.connect(str(db_path), read_only=True) - try: - config = load_config() - engine_kwargs = {k: v for k, v in config.items() if k in ( - "max_bq_registration_rows", "max_memory_mb", "max_result_rows", "timeout_seconds" - )} - # CLI --limit flag overrides config max_result_rows - engine_kwargs["max_result_rows"] = limit - engine = RemoteQueryEngine(conn, **engine_kwargs) - - for spec in register_bq_specs: - if "=" not in spec: - typer.echo( - f"Error: --register-bq spec must be 'alias=BQ_SQL', got: {spec!r}", - err=True, - ) - raise typer.Exit(1) - alias, bq_sql = spec.split("=", 1) - alias = alias.strip() - bq_sql = bq_sql.strip() - try: - info = engine.register_bq(alias, bq_sql) - typer.echo( - f"Registered BQ alias '{alias}': {info['rows']:,} rows, " - f"{info['memory_mb']:.1f} MiB", - err=True, - ) - except RemoteQueryError as exc: - # Use the shared renderer so typed BqAccessError details - # (carried via RemoteQueryError.details) surface as a - # multi-line block with the operator-facing hint. - from cli.error_render import render_error - synthetic = {"detail": { - "kind": exc.error_type, - "alias": alias, - "message": str(exc), - **(exc.details or {}), - }} - typer.echo(render_error(400, synthetic), err=True) - raise typer.Exit(1) - - try: - result = engine.execute(sql) - except RemoteQueryError as exc: - from cli.error_render import render_error - synthetic = {"detail": { - "kind": exc.error_type, - "message": str(exc), - **(exc.details or {}), - }} - typer.echo(render_error(400, synthetic), err=True) - raise typer.Exit(1) - - _output(result["columns"], result["rows"], fmt) - if result.get("truncated"): - typer.echo(f"(truncated at {result['row_count']} rows)", err=True) - finally: - conn.close() - - def _output(columns: list, rows: list, fmt: str): if fmt == "json": output = [dict(zip(columns, row)) for row in rows] diff --git a/cli/error_render.py b/cli/error_render.py index 3153917..6399478 100644 --- a/cli/error_render.py +++ b/cli/error_render.py @@ -1,8 +1,7 @@ """Shared CLI renderer for HTTP error responses. -Three CLI paths surface BigQuery / guardrail / RBAC typed errors today: +Two CLI paths surface BigQuery / guardrail / RBAC typed errors today: - ``agnes query --remote`` (POST /api/query) -- ``agnes query --register-bq`` (RemoteQueryEngine wrapping BqAccessError) - ``agnes snapshot create`` / ``agnes schema`` etc. (cli.v2_client wrappers around v2 endpoints) All three previously flattened the structured ``detail`` JSON to a diff --git a/cli/skills/agnes-data-querying.md b/cli/skills/agnes-data-querying.md index 01dd546..09dae98 100644 --- a/cli/skills/agnes-data-querying.md +++ b/cli/skills/agnes-data-querying.md @@ -30,8 +30,7 @@ Tables in `agnes catalog` have a `query_mode`: For **remote tables**, you MUST either: 1. `agnes snapshot create` a filtered subset → query the local snapshot (preferred), OR -2. `agnes query --remote` for one-shot server-side execution, OR -3. `agnes query --register-bq` for hybrid joins (rare; see docs) +2. `agnes query --remote` for one-shot server-side execution ## The `agnes snapshot create` workflow (preferred for remote tables) diff --git a/cli/skills/connectors.md b/cli/skills/connectors.md index 1f86076..7f7ac9f 100644 --- a/cli/skills/connectors.md +++ b/cli/skills/connectors.md @@ -59,7 +59,7 @@ The `_meta` table must have columns: | Latency under 100 ms, table fits on disk | `materialized` | Local parquet, no BQ roundtrip | | Table too large for analyst's disk, occasional ad-hoc query | `remote` | DuckDB BQ extension, no download | | Table too large for disk AND analyst hits it constantly | `materialized` with aggregation/filter | Scheduled COPY of a slice | -| One-off subquery joined with local data | (no registry row) | Use `agnes query --register-bq …` for ad-hoc | +| One-off subquery joined with local data | (no registry row) | `agnes snapshot create` a filtered slice and join locally, or run the join server-side via `agnes query --remote` | Cost: `materialized` runs once per `sync_schedule` regardless of how many analysts query it; `remote` runs once per analyst-query. The break-even is roughly query frequency × bytes scanned vs. one COPY × bytes scanned. diff --git a/docs/DATA_SOURCES.md b/docs/DATA_SOURCES.md index 0f487db..a81047f 100644 --- a/docs/DATA_SOURCES.md +++ b/docs/DATA_SOURCES.md @@ -143,12 +143,17 @@ Not supported in M1. The register endpoint rejects any `source_table` containing ### Hybrid Queries -For queries that JOIN local data with BigQuery results: +Server-side only. Admins can POST `{sql, register_bq: {alias: bq_sql}}` to +`/api/query/hybrid` (`app/api/query_hybrid.py`); the BigQuery sub-queries +run server-side, where BQ credentials live, and the join runs against the +server's local parquet views in a single DuckDB session. -```bash -agnes query --sql "SELECT o.*, t.views FROM orders o JOIN traffic t ON o.date = t.date" \ - --register-bq "traffic=SELECT date, SUM(views) as views FROM dataset.web GROUP BY 1" -``` +Analysts who need to combine a local table with a remote one should +`agnes snapshot create` a filtered slice of the remote table and join it +locally, or run the join server-side via `agnes query --remote`. The +earlier `agnes query --register-bq` flag (which ran in-process on the +caller's machine) was removed because it required local BigQuery +credentials that analysts don't have. ## Jira Connector diff --git a/tests/test_cli.py b/tests/test_cli.py index 3dce55d..a4a288c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -270,17 +270,6 @@ class TestAdminCommands: assert result.exit_code == 1 -class TestQueryHybrid: - def test_register_bq_flag_help(self): - result = runner.invoke(app, ["query", "--help"]) - assert result.exit_code == 0 - # Rich/Typer may insert ANSI escape codes within option names, - # so check for the parts separately - assert "register" in result.output - assert "bq" in result.output - assert "BigQuery" in result.output - - class TestCatalogMetrics: def test_catalog_metrics_help(self): result = runner.invoke(app, ["catalog", "--help"]) diff --git a/tests/test_cli_query_render.py b/tests/test_cli_query_render.py index cbe483a..10868c5 100644 --- a/tests/test_cli_query_render.py +++ b/tests/test_cli_query_render.py @@ -1,13 +1,15 @@ """CLI commands route BQ-typed errors through the shared renderer. -Three CLI paths surface BQ errors today: +Two CLI paths surface BQ errors today: - `agnes query --remote` (cli/commands/query.py:_query_remote → /api/query) -- `agnes query --register-bq` (cli/commands/query.py:_query_hybrid via - RemoteQueryError, which wraps server-side BqAccessError) - `agnes snapshot create` / `agnes schema` / etc. (cli/v2_client.V2ClientError → v2 endpoints) After the refactor they all call cli.error_render.render_error so analyst output is consistent and structured. Closes part of #160 §4.7.3. + +The third assertion below covers `RemoteQueryError.details`, which is now +exercised only via the server-side `/api/query/hybrid` endpoint +(`app/api/query_hybrid.py`) — the client CLI no longer wraps it. """ from __future__ import annotations @@ -53,8 +55,9 @@ def test_v2_client_error_drops_truncation_for_dicts(): def test_remote_query_error_carries_details(): - """`RemoteQueryError` already has a `details` field. Verify the type's - surface so cli/commands/query.py:_query_hybrid can rely on it.""" + """`RemoteQueryError` carries a structured `details` field. Verify the + type's surface so `app/api/query_hybrid.py` (the only remaining caller) + can rely on it.""" from src.remote_query import RemoteQueryError err = RemoteQueryError(