From 8233c3e3f97f081b60806917876f724230d88737 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 21:22:19 +0200 Subject: [PATCH] chore(docs): replace stale `da` verbs and vendor-specific install paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweep operator runbooks (docs/QUICKSTART, docs/HEADLESS_USAGE, docs/architecture, docs/sample-data, docs/agent-workspace-prompt, docs/metrics/metrics.yml, dev_docs/server, dev_docs/disaster-recovery), the corporate-memory service README, the jira connector README + backfill scripts, the deploy skill, and test docstrings. Replaces `da sync` → `agnes pull`, `da analyst setup` → `agnes init`, `da metrics ...` → `agnes catalog --metrics` / `agnes admin metrics ...`, `da fetch` → `agnes snapshot create`, plus the matching docker-compose admin invocations. Vendor-specific `/opt/data-analyst/` install paths in jira backfill / consistency scripts and operator docs are replaced with the placeholder `` and a new `AGNES_ENV_FILE` env-var override that lets a deployment inject its actual install path without a code change. Aligns with the OSS vendor-agnostic policy in CLAUDE.md. CHANGELOG `### Internal` entry summarizes the audit and reaffirms the intentional stale-marker tuples (`_LEGACY_STRINGS`, `_OUR_COMMAND_MARKERS`) that must keep referencing `da sync` / `da fetch` / etc. for hook upgrade and override-detection logic. --- CHANGELOG.md | 1 + cli/skills/deploy.md | 4 +- connectors/jira/README.md | 81 +++++-------------- connectors/jira/scripts/backfill.py | 9 ++- .../jira/scripts/backfill_remote_links.py | 7 +- connectors/jira/scripts/backfill_sla.py | 5 +- connectors/jira/scripts/consistency_check.py | 14 +++- dev_docs/disaster-recovery.md | 6 +- dev_docs/server.md | 19 ++--- docs/HEADLESS_USAGE.md | 4 +- docs/QUICKSTART.md | 1 - docs/agent-workspace-prompt.md | 10 +-- docs/architecture.md | 4 +- docs/metrics/metrics.yml | 2 +- docs/sample-data.md | 7 +- services/corporate_memory/README.md | 10 +-- tests/test_admin_unregister_cleanup.py | 4 +- tests/test_cli.py | 2 +- tests/test_cli_admin_metrics.py | 2 +- tests/test_cli_catalog_metrics.py | 2 +- tests/test_keboola_materialized_e2e.py | 4 +- .../test_query_materialized_error_message.py | 2 +- tests/test_setup_hooks_template.py | 2 +- tests/test_sync_trigger_materialized.py | 2 +- 24 files changed, 89 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aeff32e..91c2c91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n - `tests/test_reader_smoke_matrix.py` — load-bearing parametrized test: every reader CLI command runs on a freshly-bootstrapped zero-grants workspace without a Python traceback. - `tests/test_clean_install_integration.py` — end-to-end happy-path tests (minimal grants, zero grants, force preserves CLAUDE.local.md, readers in pre-init dir). - `docs/RELEASE_CHECKLIST.md` — manual clean-install protocol mandated for any PR touching the bootstrap path. +- Audited and replaced stale `da` verbs left over from prior merges in admin UI text, audit-log messages, code comments, operator runbooks, analyst-facing skill docs, and test docstrings (welcome template renderer/API tests now assert exact emitted markers — `agnes init` for analyst flow, `agnes auth` for admin flow — with explicit absence checks on legacy verbs). Vendor-specific `/opt/data-analyst/` install paths in jira backfill/consistency scripts and operator docs replaced with `/` and an `AGNES_ENV_FILE` env-var override. Intentional stale-marker tuples (`_LEGACY_STRINGS` in `app/api/claude_md.py`, `_OUR_COMMAND_MARKERS` in `cli/lib/hooks.py`) and tests that seed legacy hook content (`tests/test_lib_hooks.py`, `tests/test_legacy_strings_scan.py`) are preserved by design. ## [0.33.0] — 2026-05-04 diff --git a/cli/skills/deploy.md b/cli/skills/deploy.md index 70426c0..ea915d4 100644 --- a/cli/skills/deploy.md +++ b/cli/skills/deploy.md @@ -19,8 +19,8 @@ ssh user@your-server-ip ### 2. Clone the repository ```bash -git clone https://github.com/keboola/agnes-the-ai-analyst.git /opt/data-analyst -cd /opt/data-analyst +git clone https://github.com/keboola/agnes-the-ai-analyst.git +cd git checkout main ``` diff --git a/connectors/jira/README.md b/connectors/jira/README.md index 759661d..2417df9 100644 --- a/connectors/jira/README.md +++ b/connectors/jira/README.md @@ -257,7 +257,7 @@ T+Xsec Analyst: Query with DuckDB - sees latest data ### Server Environment Variables -In `/opt/data-analyst/.env`: +In `/.env` (typically the directory you run `docker compose` from): ```bash # Jira webhook integration @@ -364,7 +364,7 @@ Response: ```bash # Webapp logs (webhook processing) -tail -f /opt/data-analyst/logs/webapp-error.log | grep -i jira +docker compose logs app --tail 200 | grep -i jira # Recent webhook events ls -lt /data/src_data/raw/jira/webhook_events/ | head -20 @@ -535,67 +535,26 @@ WHERE first_response_elapsed_millis IS NOT NULL ## Analyst Sync Configuration -Jira data is an **optional dataset** - not synced by default to save bandwidth. - -**Enable Jira sync:** -```bash -# Edit local config (created on first sync_data.sh run) -nano ~/.config/data-analyst/sync.yaml - -# Change: -datasets: - jira: true # Enable parquet data (~50MB) - jira_attachments: false # Keep false unless you need actual files -``` - -**Then sync:** -```bash -bash server/scripts/sync_data.sh -``` +Whether an analyst sees Jira tables locally is decided server-side: an admin +must register the Jira tables and grant the analyst's group access via +`resource_grants(resource_type='table')`. Once granted, the manifest +advertises the tables and `agnes pull` downloads the parquets to the +analyst's workspace on the next session. DuckDB views for Jira tables are created automatically if data exists: -- `jira_issues` - main issues table -- `jira_comments` - issue comments -- `jira_attachments` - attachment metadata (filenames, sizes, URLs) -- `jira_changelog` - field change history -- `jira_issuelinks` - links between issues (blocks, duplicates, relates to) -- `jira_remote_links` - external links (Confluence, Slack, etc.) +- `jira_issues` — main issues table +- `jira_comments` — issue comments +- `jira_attachments` — attachment metadata (filenames, sizes, URLs) +- `jira_changelog` — field change history +- `jira_issuelinks` — links between issues (blocks, duplicates, relates to) +- `jira_remote_links` — external links (Confluence, Slack, etc.) ## Attachment Access -Attachments (images, logs, PDFs) are stored separately from parquet data. - -### Option 1: Download per-ticket (recommended) - -Download attachments for a specific ticket to local temp folder: - -```bash -# Download all attachments for one ticket -rsync -avz data-analyst:server/jira_attachments/SUPPORT-1234/ /tmp/SUPPORT-1234/ - -# View locally -ls /tmp/SUPPORT-1234/ -open /tmp/SUPPORT-1234/screenshot.png # macOS -``` - -This is fast (only downloads files for one ticket) and keeps your local machine clean. - -### Option 2: Sync attachments locally (for heavy analysis) - -If you need frequent access to attachments, enable full sync: - -```yaml -# ~/.config/data-analyst/sync.yaml -datasets: - jira: true - jira_attachments: true # Syncs ~500MB+ of files -``` - -Then `sync_data.sh` will rsync attachments to `./server/jira_attachments/`. - -### Finding attachment path from parquet - -The `jira_attachments` table has a `local_path` column with the server path: +Attachments (images, logs, PDFs) are stored on the server alongside parquet +data and are **not** distributed via `agnes pull` (the manifest only +advertises parquet tables). The `jira_attachments` table has a `local_path` +column with the server-side filesystem path: ```sql SELECT @@ -613,7 +572,11 @@ issue_key | filename | local_path SUPPORT-1234 | screenshot.png | /data/src_data/raw/jira/attachments/SUPPORT-1234/... | 45678 ``` -To access locally (if synced): replace `/data/src_data/raw/jira/attachments/` with `./server/jira_attachments/`. +To pull the actual file to a workstation, operators with SSH access to the +host can `scp` / `rsync` from the path above. Public OSS does not ship a +client-side attachment-fetch primitive — wire one up per deployment if +attachment access is required (e.g. a thin admin endpoint that streams the +file with the same RBAC gate as the parquet table). ## Future Improvements diff --git a/connectors/jira/scripts/backfill.py b/connectors/jira/scripts/backfill.py index 1241f62..c655807 100755 --- a/connectors/jira/scripts/backfill.py +++ b/connectors/jira/scripts/backfill.py @@ -6,7 +6,7 @@ Downloads all issues from Jira using JQL search with pagination. Reuses the webapp's JiraService for consistent data handling. Usage: - # On server (uses /opt/data-analyst/.env): + # On server (loads .env from /.env or the current directory): python -m connectors.jira.scripts.backfill # With custom settings: @@ -58,12 +58,15 @@ class Config: @classmethod def from_env(cls) -> "Config": """Load configuration from environment variables.""" - # Try to load .env file from common locations + # Try to load .env file from common locations. + # Customer-specific install paths (e.g. /opt//.env) can be + # injected via the AGNES_ENV_FILE env var without editing this list. env_paths = [ - Path("/opt/data-analyst/.env"), + Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None, Path.cwd() / ".env", Path(__file__).parent.parent / ".env", ] + env_paths = [p for p in env_paths if p is not None] for env_path in env_paths: if env_path.exists(): load_dotenv(env_path) diff --git a/connectors/jira/scripts/backfill_remote_links.py b/connectors/jira/scripts/backfill_remote_links.py index 9ecb1a3..211a757 100644 --- a/connectors/jira/scripts/backfill_remote_links.py +++ b/connectors/jira/scripts/backfill_remote_links.py @@ -7,7 +7,7 @@ and embeds them into existing issue JSON files. This enables the Parquet transform to extract remote_links table data. Usage: - # On server (uses /opt/data-analyst/.env): + # On server (loads .env from /.env or the current directory): python -m connectors.jira.scripts.backfill_remote_links # With parallel workers: @@ -44,11 +44,14 @@ logger = logging.getLogger(__name__) def load_config() -> dict: """Load configuration from environment variables.""" + # Customer-specific install paths (e.g. /opt//.env) can be + # injected via the AGNES_ENV_FILE env var without editing this list. env_paths = [ - Path("/opt/data-analyst/.env"), + Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None, Path.cwd() / ".env", Path(__file__).parent.parent / ".env", ] + env_paths = [p for p in env_paths if p is not None] for env_path in env_paths: if env_path.exists(): load_dotenv(env_path) diff --git a/connectors/jira/scripts/backfill_sla.py b/connectors/jira/scripts/backfill_sla.py index afe1519..3b579b9 100644 --- a/connectors/jira/scripts/backfill_sla.py +++ b/connectors/jira/scripts/backfill_sla.py @@ -57,11 +57,14 @@ logger = logging.getLogger(__name__) def load_config() -> dict: """Load configuration from environment variables.""" + # Customer-specific install paths (e.g. /opt//.env) can be + # injected via the AGNES_ENV_FILE env var without editing this list. env_paths = [ - Path("/opt/data-analyst/.env"), + Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None, Path.cwd() / ".env", Path(__file__).parent.parent / ".env", ] + env_paths = [p for p in env_paths if p is not None] for env_path in env_paths: if env_path.exists(): load_dotenv(env_path) diff --git a/connectors/jira/scripts/consistency_check.py b/connectors/jira/scripts/consistency_check.py index 1fe5a3a..3dc37c9 100644 --- a/connectors/jira/scripts/consistency_check.py +++ b/connectors/jira/scripts/consistency_check.py @@ -72,12 +72,15 @@ class Config: @classmethod def from_env(cls) -> "Config": """Load configuration from environment variables.""" - # Try to load .env file from common locations + # Try to load .env file from common locations. + # Customer-specific install paths (e.g. /opt//.env) can be + # injected via the AGNES_ENV_FILE env var without editing this list. env_paths = [ - Path("/opt/data-analyst/.env"), + Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None, Path.cwd() / ".env", Path(__file__).parent.parent / ".env", ] + env_paths = [p for p in env_paths if p is not None] for env_path in env_paths: if env_path.exists(): load_dotenv(env_path) @@ -92,8 +95,11 @@ class Config: raw_dir = Path(os.environ.get("JIRA_DATA_DIR", "/data/src_data/raw/jira")) parquet_dir = Path(os.environ.get("JIRA_PARQUET_DIR", "/data/src_data/parquet/jira")) - repo_dir = Path(os.environ.get("REPO_DIR", "/opt/data-analyst/repo")) - venv_python = Path(os.environ.get("VENV_PYTHON", "/opt/data-analyst/.venv/bin/python")) + # REPO_DIR / VENV_PYTHON have no sensible OSS default — operators + # must export them when running this script outside an editable + # checkout. + repo_dir = Path(os.environ.get("REPO_DIR", str(Path(__file__).resolve().parents[3]))) + venv_python = Path(os.environ.get("VENV_PYTHON", sys.executable)) return cls( jira_domain=os.environ["JIRA_DOMAIN"], diff --git a/dev_docs/disaster-recovery.md b/dev_docs/disaster-recovery.md index ce58c90..ff859f0 100644 --- a/dev_docs/disaster-recovery.md +++ b/dev_docs/disaster-recovery.md @@ -87,8 +87,6 @@ docker compose up -d # Trigger a full sync from the data source curl -X POST http://localhost:8000/api/sync/trigger -# Or via CLI: -docker compose exec app da sync ``` DuckDB extract files and parquet will be repopulated from Keboola / BigQuery. @@ -123,8 +121,8 @@ not regenerated — user accounts and table definitions are not recreated by syn 4. **Clone repo and create .env**: ```bash - git clone git@github.com:your-org/ai-data-analyst.git /opt/data-analyst - cd /opt/data-analyst + git clone git@github.com:keboola/agnes-the-ai-analyst.git + cd cp config/.env.template .env # Fill in secrets from GitHub Secrets / 1Password ``` diff --git a/dev_docs/server.md b/dev_docs/server.md index 0a2a776..36a1106 100644 --- a/dev_docs/server.md +++ b/dev_docs/server.md @@ -88,11 +88,8 @@ the database is unavailable. # Via API curl -X POST http://localhost:8000/api/sync/trigger -# Via CLI inside the container -docker compose exec app da sync - # Sync a single table -docker compose exec app da sync --table table_name +curl -X POST "http://localhost:8000/api/sync/trigger?table=table_name" ``` ### Check sync status @@ -123,16 +120,16 @@ any destructive operation. ```bash # List registered tables -docker compose exec app da admin tables list +docker compose exec app agnes admin list-tables # Register a new table -docker compose exec app da admin tables add +docker compose exec app agnes admin register-table # User management -docker compose exec app da admin users list +docker compose exec app agnes admin list-users # Query data directly -docker compose exec app da query "SELECT * FROM my_table LIMIT 10" +docker compose exec app agnes query "SELECT * FROM my_table LIMIT 10" ``` ## Application Deployment @@ -143,7 +140,7 @@ Application is deployed via Docker image. The recommended workflow: 2. CI builds and pushes a new image 3. On the server, pull and restart: ```bash - cd /opt/data-analyst + cd docker compose pull docker compose up -d ``` @@ -154,7 +151,7 @@ To pin a specific image version, set the tag in `docker-compose.yml` before depl ```bash # Edit .env (never commit this file) -nano /opt/data-analyst/.env +nano /.env # Restart app to apply changes docker compose restart app @@ -297,7 +294,7 @@ most lock issues. docker compose logs app | grep -i "sync\|error\|exception" # Verify data source credentials in .env -docker compose exec app da admin tables list +docker compose exec app agnes admin list-tables ``` ### Out of disk space diff --git a/docs/HEADLESS_USAGE.md b/docs/HEADLESS_USAGE.md index 2270f17..aa0086d 100644 --- a/docs/HEADLESS_USAGE.md +++ b/docs/HEADLESS_USAGE.md @@ -31,8 +31,8 @@ agnes query "SELECT 1" AGNES_TOKEN: ${{ secrets.AGNES_TOKEN }} AGNES_SERVER: https://agnes.example.com run: | - pip install data-analyst - da sync --all + uv tool install "$AGNES_SERVER/cli/wheel/agnes.whl" + agnes pull ``` ## Revoke diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index baa3dcc..ce86163 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -48,7 +48,6 @@ 7. Trigger a data sync: ```bash curl -X POST http://localhost:8000/api/sync/trigger - # Or: da sync ``` ## Docker Deployment diff --git a/docs/agent-workspace-prompt.md b/docs/agent-workspace-prompt.md index 2488295..8c8e1ca 100644 --- a/docs/agent-workspace-prompt.md +++ b/docs/agent-workspace-prompt.md @@ -1,23 +1,23 @@ # Agent Workspace Prompt The agent workspace prompt is the `CLAUDE.md` file written to each analyst's -workspace by `da analyst setup`. It gives Claude Code context about the +workspace by `agnes init`. It gives Claude Code context about the connected instance: available tables (RBAC-filtered), business metrics, installed plugins, and operational rules for the analyst. ## When is CLAUDE.md written? -`da analyst setup` fetches `GET /api/welcome` and writes the rendered markdown +`agnes init` fetches `GET /api/welcome` and writes the rendered markdown to `/CLAUDE.md` on every run (including `--force` re-initialisation). To skip writing CLAUDE.md: ```bash -da analyst setup --server-url https://agnes.example.com --no-claude-md +agnes init --server-url https://agnes.example.com --no-claude-md ``` **Analysts who ran setup while CLAUDE.md generation was temporarily absent** will -have their file written on the next `da analyst setup` run. Any existing +have their file written on the next `agnes init` run. Any existing `CLAUDE.md` is overwritten with the current server template. The companion `CLAUDE.local.md` (at `.claude/CLAUDE.local.md`) is **never** @@ -110,5 +110,5 @@ PUT validation time, so the admin is notified immediately. Click **Reset to default** in the admin UI, or call `DELETE /api/admin/workspace-prompt-template`. The next analyst who runs -`da analyst setup` will receive the rich default template from +`agnes init` will receive the rich default template from `config/claude_md_template.txt`. diff --git a/docs/architecture.md b/docs/architecture.md index 943931d..f9f5bfa 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -13,7 +13,7 @@ ai-data-analyst/ │ ├── auth/ Auth providers (JWT, Google OAuth, email magic link, password) │ └── web/ HTML dashboard routes ├── services/ Standalone background services (scheduler, telegram_bot, ws_gateway, …) -├── cli/ CLI tool (da sync, agnes query, agnes admin) +├── cli/ CLI tool (agnes pull, agnes query, agnes admin) ├── scripts/ Utility and migration scripts ├── config/ Instance configuration templates ├── tests/ Test suite @@ -185,7 +185,7 @@ POST /api/sync/trigger (admin) - Runs admin-registered SQL through the DuckDB BigQuery extension via `BqAccess.duckdb_session()` and writes the result to `/data/extracts/bigquery/data/.parquet` atomically (`.parquet.tmp` → `os.replace`). - Triggered by `_run_materialized_pass` in `app/api/sync.py` between custom-connectors and orchestrator rebuild on every `/api/sync/trigger`. Per-table `sync_schedule` honored via `is_table_due()`. - Cost guardrail: BQ dry-run via `app.api.v2_scan._bq_dry_run_bytes` (single source of truth for cost-estimate logic). `data_source.bigquery.max_bytes_per_materialize` (default 10 GiB; `0` disables). Fail-open when dry-run errors (DuckDB three-part syntax the native BQ client can't parse) — log warning + proceed. -- Distribution: result parquet rides the same manifest + `da sync` flow as Keboola tables. Per-user RBAC unchanged (`resource_grants(group, ResourceType.TABLE, table_id)`). +- Distribution: result parquet rides the same manifest + `agnes pull` flow as Keboola tables. Per-user RBAC unchanged (`resource_grants(group, ResourceType.TABLE, table_id)`). ### Jira — Real-Time Push diff --git a/docs/metrics/metrics.yml b/docs/metrics/metrics.yml index 6478e94..a33c086 100644 --- a/docs/metrics/metrics.yml +++ b/docs/metrics/metrics.yml @@ -1,5 +1,5 @@ version: "2.0" -description: "Business metrics starter pack. Import with: da metrics import docs/metrics/" +description: "Business metrics starter pack. Import with: agnes admin metrics import docs/metrics/" categories: - name: revenue folder: revenue/ diff --git a/docs/sample-data.md b/docs/sample-data.md index 1670649..16a07c7 100644 --- a/docs/sample-data.md +++ b/docs/sample-data.md @@ -169,12 +169,13 @@ diff -r run1 run2 # no differences To use sample data on a deployed server (instead of connecting a data adapter): ```bash -# On the server -cd /opt/data-analyst/repo +# On the server, from the install directory containing your repo checkout +# and Python venv (paths vary per deployment): +cd /repo # Generate Parquet files directly using project's ParquetManager # (snappy compression, proper column types, metadata embedding) -/opt/data-analyst/.venv/bin/python scripts/generate_sample_data.py \ +/.venv/bin/python scripts/generate_sample_data.py \ --size m --format parquet --output /data/src_data/parquet --seed 42 # Set correct permissions diff --git a/services/corporate_memory/README.md b/services/corporate_memory/README.md index ca4d5f4..366e2ed 100644 --- a/services/corporate_memory/README.md +++ b/services/corporate_memory/README.md @@ -78,7 +78,7 @@ Corporate Memory solves this by making institutional knowledge: └──────────┬───────────┘ │ ┌──────────▼───────────┐ - │ da sync │ + │ agnes pull │ │ │ │ .claude/rules/ │ │ km_.md │ ← one per mandatory item @@ -238,7 +238,7 @@ The highest-ranked facts enter the agent's context first. Mandatory items bypass ### Claude Code integration -`da sync` writes the bundle as files in `.claude/rules/`: +`agnes pull` writes the bundle as files in `.claude/rules/`: ``` .claude/rules/ @@ -368,7 +368,7 @@ agnes-the-ai-analyst/ ├── src/repositories/knowledge.py ← DuckDB CRUD (no SQL in API layer) ├── src/db.py ← Schema: knowledge_items + 4 supporting tables │ -└── cli/commands/sync.py ← da sync step 7: fetch bundle → write km_*.md +└── cli/commands/pull.py ← agnes pull step 7: fetch bundle → write km_*.md ``` --- @@ -401,7 +401,7 @@ An analyst working on sensitive M&A data marks their items as personal. The note | | Corporate Memory | Static `CLAUDE.md` | Vector RAG | Fine-tuning | |---|---|---|---|---| -| **Update latency** | Next `da sync` (~minutes) | Manual edit + redeploy | Near-realtime | Days to weeks | +| **Update latency** | Next `agnes pull` (~minutes) | Manual edit + redeploy | Near-realtime | Days to weeks | | **Governance** | Approve / reject / audit | None | None | Training data curation | | **Confidence scoring** | Yes (source + decay) | No | Similarity score only | Baked into weights | | **Contradiction detection** | Yes (auto, per domain) | No | No | No (invisible) | @@ -454,7 +454,7 @@ Scans `/data/user_sessions/*.jsonl`, extracts knowledge from unprocessed session Corporate Memory is wired into Agnes' sync pipeline automatically: ``` -da sync +agnes pull step 1–6: download tables, rebuild DuckDB views step 7: fetch /api/memory/bundle → write .claude/rules/km_*.md ``` diff --git a/tests/test_admin_unregister_cleanup.py b/tests/test_admin_unregister_cleanup.py index 4b5ffd4..b845bc3 100644 --- a/tests/test_admin_unregister_cleanup.py +++ b/tests/test_admin_unregister_cleanup.py @@ -1,6 +1,6 @@ """DELETE /api/admin/registry/{id} for materialized rows must remove the materialized parquet file too — otherwise sync_state still has the row, -the manifest still serves it, and `da sync` keeps trying to download +the manifest still serves it, and `agnes pull` keeps trying to download data for a table that no longer has a registry entry. The orchestrator's rebuild path additionally skips parquets that lack a matching table_registry row, so a transient race (or operator-deleted parquet) @@ -184,7 +184,7 @@ def test_delete_remote_bq_row_does_not_touch_data_dir( def test_delete_clears_sync_state_for_materialized_row(seeded_app, keboola_instance): """DELETE must also clear the sync_state row so the manifest stops - advertising the dropped table to `da sync`.""" + advertising the dropped table to `agnes pull`.""" c = seeded_app["client"] token = seeded_app["admin_token"] diff --git a/tests/test_cli.py b/tests/test_cli.py index f263a51..3dce55d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -285,7 +285,7 @@ class TestCatalogMetrics: def test_catalog_metrics_help(self): result = runner.invoke(app, ["catalog", "--help"]) assert result.exit_code == 0 - # `agnes catalog --metrics` replaces the old `da metrics list/show`. + # `agnes catalog --metrics` lists business-metric definitions. assert "metrics" in result.output.lower() def test_admin_metrics_help(self): diff --git a/tests/test_cli_admin_metrics.py b/tests/test_cli_admin_metrics.py index 8f77168..4f31b5f 100644 --- a/tests/test_cli_admin_metrics.py +++ b/tests/test_cli_admin_metrics.py @@ -1,4 +1,4 @@ -"""Tests for `agnes admin metrics {import,export,validate}` (lifted from `da metrics`).""" +"""Tests for `agnes admin metrics {import,export,validate}`.""" from typer.testing import CliRunner diff --git a/tests/test_cli_catalog_metrics.py b/tests/test_cli_catalog_metrics.py index 329cd45..835bfac 100644 --- a/tests/test_cli_catalog_metrics.py +++ b/tests/test_cli_catalog_metrics.py @@ -1,4 +1,4 @@ -"""Tests for `agnes catalog --metrics` (folded from `da metrics list/show`).""" +"""Tests for `agnes catalog --metrics`.""" from typer.testing import CliRunner diff --git a/tests/test_keboola_materialized_e2e.py b/tests/test_keboola_materialized_e2e.py index 90982b6..aee2dde 100644 --- a/tests/test_keboola_materialized_e2e.py +++ b/tests/test_keboola_materialized_e2e.py @@ -1,5 +1,5 @@ """End-to-end: register a Keboola materialized row -> trigger sync -> -parquet appears -> manifest serves it -> CLI da sync would download it. +parquet appears -> manifest serves it -> CLI agnes pull would download it. Skipped unless KBC_TEST_URL + KBC_TEST_TOKEN + KBC_TEST_BUCKET + KBC_TEST_TABLE are present. @@ -68,4 +68,4 @@ def test_register_trigger_manifest_path(seeded_app, monkeypatch, tmp_path): assert smoke is not None assert smoke["source_type"] == "keboola" assert smoke["query_mode"] == "local" # materialized parquets surface as local - assert smoke["md5"] # has a hash for da sync delta detection + assert smoke["md5"] # has a hash for agnes pull delta detection diff --git a/tests/test_query_materialized_error_message.py b/tests/test_query_materialized_error_message.py index b56eab0..e9e6cef 100644 --- a/tests/test_query_materialized_error_message.py +++ b/tests/test_query_materialized_error_message.py @@ -23,7 +23,7 @@ def _auth(token: str) -> dict: def test_query_materialized_id_not_in_views_returns_helpful_message(seeded_app): """An admin querying a materialized id that isn't yet materialized in the local analytics.duckdb gets a 400 whose detail names the - query_mode and points at `da sync` / direct-BQ-query.""" + query_mode and points at `agnes pull` / direct-BQ-query.""" from src.db import get_system_db sys_conn = get_system_db() try: diff --git a/tests/test_setup_hooks_template.py b/tests/test_setup_hooks_template.py index becfbbe..c684630 100644 --- a/tests/test_setup_hooks_template.py +++ b/tests/test_setup_hooks_template.py @@ -21,7 +21,7 @@ def test_template_has_session_end_upload(): ends = cfg.get("hooks", {}).get("SessionEnd", []) cmds = [h["command"] for entry in ends for h in entry.get("hooks", [])] assert any("agnes push" in c for c in cmds), ( - f"Expected `da sync --upload-only` in SessionEnd, got {cmds}" + f"Expected `agnes push` in SessionEnd, got {cmds}" ) diff --git a/tests/test_sync_trigger_materialized.py b/tests/test_sync_trigger_materialized.py index 7667a5b..7564163 100644 --- a/tests/test_sync_trigger_materialized.py +++ b/tests/test_sync_trigger_materialized.py @@ -156,7 +156,7 @@ def test_materialized_pass_collects_errors_per_row(system_db, stub_bq, tmp_path) def test_materialized_pass_records_parquet_hash(system_db, stub_bq, tmp_path): """sync_state.hash must be the MD5 of the parquet file — otherwise the - manifest reports an empty hash and every da sync re-downloads.""" + manifest reports an empty hash and every agnes pull re-downloads.""" repo = TableRegistryRepository(system_db) repo.register( id="hashed", name="hashed",