From 8233c3e3f97f081b60806917876f724230d88737 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Mon, 4 May 2026 21:22:19 +0200
Subject: [PATCH] chore(docs): replace stale `da` verbs and vendor-specific
 install paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sweep operator runbooks (docs/QUICKSTART, docs/HEADLESS_USAGE,
docs/architecture, docs/sample-data, docs/agent-workspace-prompt,
docs/metrics/metrics.yml, dev_docs/server, dev_docs/disaster-recovery),
the corporate-memory service README, the jira connector README + backfill
scripts, the deploy skill, and test docstrings. Replaces `da sync` →
`agnes pull`, `da analyst setup` → `agnes init`, `da metrics ...` →
`agnes catalog --metrics` / `agnes admin metrics ...`, `da fetch` →
`agnes snapshot create`, plus the matching docker-compose admin
invocations.

Vendor-specific `/opt/data-analyst/` install paths in jira backfill /
consistency scripts and operator docs are replaced with the
placeholder `<install-dir>` and a new `AGNES_ENV_FILE` env-var override
that lets a deployment inject its actual install path without a code
change. Aligns with the OSS vendor-agnostic policy in CLAUDE.md.

CHANGELOG `### Internal` entry summarizes the audit and reaffirms the
intentional stale-marker tuples (`_LEGACY_STRINGS`, `_OUR_COMMAND_MARKERS`)
that must keep referencing `da sync` / `da fetch` / etc. for hook upgrade
and override-detection logic.
---
 CHANGELOG.md                                  |  1 +
 cli/skills/deploy.md                          |  4 +-
 connectors/jira/README.md                     | 81 +++++--------------
 connectors/jira/scripts/backfill.py           |  9 ++-
 .../jira/scripts/backfill_remote_links.py     |  7 +-
 connectors/jira/scripts/backfill_sla.py       |  5 +-
 connectors/jira/scripts/consistency_check.py  | 14 +++-
 dev_docs/disaster-recovery.md                 |  6 +-
 dev_docs/server.md                            | 19 ++---
 docs/HEADLESS_USAGE.md                        |  4 +-
 docs/QUICKSTART.md                            |  1 -
 docs/agent-workspace-prompt.md                | 10 +--
 docs/architecture.md                          |  4 +-
 docs/metrics/metrics.yml                      |  2 +-
 docs/sample-data.md                           |  7 +-
 services/corporate_memory/README.md           | 10 +--
 tests/test_admin_unregister_cleanup.py        |  4 +-
 tests/test_cli.py                             |  2 +-
 tests/test_cli_admin_metrics.py               |  2 +-
 tests/test_cli_catalog_metrics.py             |  2 +-
 tests/test_keboola_materialized_e2e.py        |  4 +-
 .../test_query_materialized_error_message.py  |  2 +-
 tests/test_setup_hooks_template.py            |  2 +-
 tests/test_sync_trigger_materialized.py       |  2 +-
 24 files changed, 89 insertions(+), 115 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index aeff32e..91c2c91 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -55,6 +55,7 @@ End-to-end clean-analyst-bootstrap rewrite. The web `/setup?role=analyst` page n
 - `tests/test_reader_smoke_matrix.py` — load-bearing parametrized test: every reader CLI command runs on a freshly-bootstrapped zero-grants workspace without a Python traceback.
 - `tests/test_clean_install_integration.py` — end-to-end happy-path tests (minimal grants, zero grants, force preserves CLAUDE.local.md, readers in pre-init dir).
 - `docs/RELEASE_CHECKLIST.md` — manual clean-install protocol mandated for any PR touching the bootstrap path.
+- Audited and replaced stale `da` verbs left over from prior merges in admin UI text, audit-log messages, code comments, operator runbooks, analyst-facing skill docs, and test docstrings (welcome template renderer/API tests now assert exact emitted markers — `agnes init` for analyst flow, `agnes auth` for admin flow — with explicit absence checks on legacy verbs). Vendor-specific `/opt/data-analyst/` install paths in jira backfill/consistency scripts and operator docs replaced with `<install-dir>/` and an `AGNES_ENV_FILE` env-var override. Intentional stale-marker tuples (`_LEGACY_STRINGS` in `app/api/claude_md.py`, `_OUR_COMMAND_MARKERS` in `cli/lib/hooks.py`) and tests that seed legacy hook content (`tests/test_lib_hooks.py`, `tests/test_legacy_strings_scan.py`) are preserved by design.
 
 ## [0.33.0] — 2026-05-04
 
diff --git a/cli/skills/deploy.md b/cli/skills/deploy.md
index 70426c0..ea915d4 100644
--- a/cli/skills/deploy.md
+++ b/cli/skills/deploy.md
@@ -19,8 +19,8 @@ ssh user@your-server-ip
 ### 2. Clone the repository
 
 ```bash
-git clone https://github.com/keboola/agnes-the-ai-analyst.git /opt/data-analyst
-cd /opt/data-analyst
+git clone https://github.com/keboola/agnes-the-ai-analyst.git <install-dir>
+cd <install-dir>
 git checkout main
 ```
 
diff --git a/connectors/jira/README.md b/connectors/jira/README.md
index 759661d..2417df9 100644
--- a/connectors/jira/README.md
+++ b/connectors/jira/README.md
@@ -257,7 +257,7 @@ T+Xsec   Analyst: Query with DuckDB - sees latest data
 
 ### Server Environment Variables
 
-In `/opt/data-analyst/.env`:
+In `<install-dir>/.env` (typically the directory you run `docker compose` from):
 
 ```bash
 # Jira webhook integration
@@ -364,7 +364,7 @@ Response:
 
 ```bash
 # Webapp logs (webhook processing)
-tail -f /opt/data-analyst/logs/webapp-error.log | grep -i jira
+docker compose logs app --tail 200 | grep -i jira
 
 # Recent webhook events
 ls -lt /data/src_data/raw/jira/webhook_events/ | head -20
@@ -535,67 +535,26 @@ WHERE first_response_elapsed_millis IS NOT NULL
 
 ## Analyst Sync Configuration
 
-Jira data is an **optional dataset** - not synced by default to save bandwidth.
-
-**Enable Jira sync:**
-```bash
-# Edit local config (created on first sync_data.sh run)
-nano ~/.config/data-analyst/sync.yaml
-
-# Change:
-datasets:
-  jira: true              # Enable parquet data (~50MB)
-  jira_attachments: false # Keep false unless you need actual files
-```
-
-**Then sync:**
-```bash
-bash server/scripts/sync_data.sh
-```
+Whether an analyst sees Jira tables locally is decided server-side: an admin
+must register the Jira tables and grant the analyst's group access via
+`resource_grants(resource_type='table')`. Once granted, the manifest
+advertises the tables and `agnes pull` downloads the parquets to the
+analyst's workspace on the next session.
 
 DuckDB views for Jira tables are created automatically if data exists:
-- `jira_issues` - main issues table
-- `jira_comments` - issue comments
-- `jira_attachments` - attachment metadata (filenames, sizes, URLs)
-- `jira_changelog` - field change history
-- `jira_issuelinks` - links between issues (blocks, duplicates, relates to)
-- `jira_remote_links` - external links (Confluence, Slack, etc.)
+- `jira_issues` — main issues table
+- `jira_comments` — issue comments
+- `jira_attachments` — attachment metadata (filenames, sizes, URLs)
+- `jira_changelog` — field change history
+- `jira_issuelinks` — links between issues (blocks, duplicates, relates to)
+- `jira_remote_links` — external links (Confluence, Slack, etc.)
 
 ## Attachment Access
 
-Attachments (images, logs, PDFs) are stored separately from parquet data.
-
-### Option 1: Download per-ticket (recommended)
-
-Download attachments for a specific ticket to local temp folder:
-
-```bash
-# Download all attachments for one ticket
-rsync -avz data-analyst:server/jira_attachments/SUPPORT-1234/ /tmp/SUPPORT-1234/
-
-# View locally
-ls /tmp/SUPPORT-1234/
-open /tmp/SUPPORT-1234/screenshot.png  # macOS
-```
-
-This is fast (only downloads files for one ticket) and keeps your local machine clean.
-
-### Option 2: Sync attachments locally (for heavy analysis)
-
-If you need frequent access to attachments, enable full sync:
-
-```yaml
-# ~/.config/data-analyst/sync.yaml
-datasets:
-  jira: true
-  jira_attachments: true   # Syncs ~500MB+ of files
-```
-
-Then `sync_data.sh` will rsync attachments to `./server/jira_attachments/`.
-
-### Finding attachment path from parquet
-
-The `jira_attachments` table has a `local_path` column with the server path:
+Attachments (images, logs, PDFs) are stored on the server alongside parquet
+data and are **not** distributed via `agnes pull` (the manifest only
+advertises parquet tables). The `jira_attachments` table has a `local_path`
+column with the server-side filesystem path:
 
 ```sql
 SELECT
@@ -613,7 +572,11 @@ issue_key     | filename        | local_path
 SUPPORT-1234  | screenshot.png  | /data/src_data/raw/jira/attachments/SUPPORT-1234/... | 45678
 ```
 
-To access locally (if synced): replace `/data/src_data/raw/jira/attachments/` with `./server/jira_attachments/`.
+To pull the actual file to a workstation, operators with SSH access to the
+host can `scp` / `rsync` from the path above. Public OSS does not ship a
+client-side attachment-fetch primitive — wire one up per deployment if
+attachment access is required (e.g. a thin admin endpoint that streams the
+file with the same RBAC gate as the parquet table).
 
 ## Future Improvements
 
diff --git a/connectors/jira/scripts/backfill.py b/connectors/jira/scripts/backfill.py
index 1241f62..c655807 100755
--- a/connectors/jira/scripts/backfill.py
+++ b/connectors/jira/scripts/backfill.py
@@ -6,7 +6,7 @@ Downloads all issues from Jira using JQL search with pagination.
 Reuses the webapp's JiraService for consistent data handling.
 
 Usage:
-    # On server (uses /opt/data-analyst/.env):
+    # On server (loads .env from <install-dir>/.env or the current directory):
     python -m connectors.jira.scripts.backfill
 
     # With custom settings:
@@ -58,12 +58,15 @@ class Config:
     @classmethod
     def from_env(cls) -> "Config":
         """Load configuration from environment variables."""
-        # Try to load .env file from common locations
+        # Try to load .env file from common locations.
+        # Customer-specific install paths (e.g. /opt/<deployment>/.env) can be
+        # injected via the AGNES_ENV_FILE env var without editing this list.
         env_paths = [
-            Path("/opt/data-analyst/.env"),
+            Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None,
             Path.cwd() / ".env",
             Path(__file__).parent.parent / ".env",
         ]
+        env_paths = [p for p in env_paths if p is not None]
         for env_path in env_paths:
             if env_path.exists():
                 load_dotenv(env_path)
diff --git a/connectors/jira/scripts/backfill_remote_links.py b/connectors/jira/scripts/backfill_remote_links.py
index 9ecb1a3..211a757 100644
--- a/connectors/jira/scripts/backfill_remote_links.py
+++ b/connectors/jira/scripts/backfill_remote_links.py
@@ -7,7 +7,7 @@ and embeds them into existing issue JSON files. This enables the
 Parquet transform to extract remote_links table data.
 
 Usage:
-    # On server (uses /opt/data-analyst/.env):
+    # On server (loads .env from <install-dir>/.env or the current directory):
     python -m connectors.jira.scripts.backfill_remote_links
 
     # With parallel workers:
@@ -44,11 +44,14 @@ logger = logging.getLogger(__name__)
 
 def load_config() -> dict:
     """Load configuration from environment variables."""
+    # Customer-specific install paths (e.g. /opt/<deployment>/.env) can be
+    # injected via the AGNES_ENV_FILE env var without editing this list.
     env_paths = [
-        Path("/opt/data-analyst/.env"),
+        Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None,
         Path.cwd() / ".env",
         Path(__file__).parent.parent / ".env",
     ]
+    env_paths = [p for p in env_paths if p is not None]
     for env_path in env_paths:
         if env_path.exists():
             load_dotenv(env_path)
diff --git a/connectors/jira/scripts/backfill_sla.py b/connectors/jira/scripts/backfill_sla.py
index afe1519..3b579b9 100644
--- a/connectors/jira/scripts/backfill_sla.py
+++ b/connectors/jira/scripts/backfill_sla.py
@@ -57,11 +57,14 @@ logger = logging.getLogger(__name__)
 
 def load_config() -> dict:
     """Load configuration from environment variables."""
+    # Customer-specific install paths (e.g. /opt/<deployment>/.env) can be
+    # injected via the AGNES_ENV_FILE env var without editing this list.
     env_paths = [
-        Path("/opt/data-analyst/.env"),
+        Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None,
         Path.cwd() / ".env",
         Path(__file__).parent.parent / ".env",
     ]
+    env_paths = [p for p in env_paths if p is not None]
     for env_path in env_paths:
         if env_path.exists():
             load_dotenv(env_path)
diff --git a/connectors/jira/scripts/consistency_check.py b/connectors/jira/scripts/consistency_check.py
index 1fe5a3a..3dc37c9 100644
--- a/connectors/jira/scripts/consistency_check.py
+++ b/connectors/jira/scripts/consistency_check.py
@@ -72,12 +72,15 @@ class Config:
     @classmethod
     def from_env(cls) -> "Config":
         """Load configuration from environment variables."""
-        # Try to load .env file from common locations
+        # Try to load .env file from common locations.
+        # Customer-specific install paths (e.g. /opt/<deployment>/.env) can be
+        # injected via the AGNES_ENV_FILE env var without editing this list.
         env_paths = [
-            Path("/opt/data-analyst/.env"),
+            Path(os.environ["AGNES_ENV_FILE"]) if os.environ.get("AGNES_ENV_FILE") else None,
             Path.cwd() / ".env",
             Path(__file__).parent.parent / ".env",
         ]
+        env_paths = [p for p in env_paths if p is not None]
         for env_path in env_paths:
             if env_path.exists():
                 load_dotenv(env_path)
@@ -92,8 +95,11 @@ class Config:
 
         raw_dir = Path(os.environ.get("JIRA_DATA_DIR", "/data/src_data/raw/jira"))
         parquet_dir = Path(os.environ.get("JIRA_PARQUET_DIR", "/data/src_data/parquet/jira"))
-        repo_dir = Path(os.environ.get("REPO_DIR", "/opt/data-analyst/repo"))
-        venv_python = Path(os.environ.get("VENV_PYTHON", "/opt/data-analyst/.venv/bin/python"))
+        # REPO_DIR / VENV_PYTHON have no sensible OSS default — operators
+        # must export them when running this script outside an editable
+        # checkout.
+        repo_dir = Path(os.environ.get("REPO_DIR", str(Path(__file__).resolve().parents[3])))
+        venv_python = Path(os.environ.get("VENV_PYTHON", sys.executable))
 
         return cls(
             jira_domain=os.environ["JIRA_DOMAIN"],
diff --git a/dev_docs/disaster-recovery.md b/dev_docs/disaster-recovery.md
index ce58c90..ff859f0 100644
--- a/dev_docs/disaster-recovery.md
+++ b/dev_docs/disaster-recovery.md
@@ -87,8 +87,6 @@ docker compose up -d
 
 # Trigger a full sync from the data source
 curl -X POST http://localhost:8000/api/sync/trigger
-# Or via CLI:
-docker compose exec app da sync
 ```
 
 DuckDB extract files and parquet will be repopulated from Keboola / BigQuery.
@@ -123,8 +121,8 @@ not regenerated — user accounts and table definitions are not recreated by syn
 
 4. **Clone repo and create .env**:
    ```bash
-   git clone git@github.com:your-org/ai-data-analyst.git /opt/data-analyst
-   cd /opt/data-analyst
+   git clone git@github.com:keboola/agnes-the-ai-analyst.git <install-dir>
+   cd <install-dir>
    cp config/.env.template .env
    # Fill in secrets from GitHub Secrets / 1Password
    ```
diff --git a/dev_docs/server.md b/dev_docs/server.md
index 0a2a776..36a1106 100644
--- a/dev_docs/server.md
+++ b/dev_docs/server.md
@@ -88,11 +88,8 @@ the database is unavailable.
 # Via API
 curl -X POST http://localhost:8000/api/sync/trigger
 
-# Via CLI inside the container
-docker compose exec app da sync
-
 # Sync a single table
-docker compose exec app da sync --table table_name
+curl -X POST "http://localhost:8000/api/sync/trigger?table=table_name"
 ```
 
 ### Check sync status
@@ -123,16 +120,16 @@ any destructive operation.
 
 ```bash
 # List registered tables
-docker compose exec app da admin tables list
+docker compose exec app agnes admin list-tables
 
 # Register a new table
-docker compose exec app da admin tables add
+docker compose exec app agnes admin register-table
 
 # User management
-docker compose exec app da admin users list
+docker compose exec app agnes admin list-users
 
 # Query data directly
-docker compose exec app da query "SELECT * FROM my_table LIMIT 10"
+docker compose exec app agnes query "SELECT * FROM my_table LIMIT 10"
 ```
 
 ## Application Deployment
@@ -143,7 +140,7 @@ Application is deployed via Docker image. The recommended workflow:
 2. CI builds and pushes a new image
 3. On the server, pull and restart:
    ```bash
-   cd /opt/data-analyst
+   cd <install-dir>
    docker compose pull
    docker compose up -d
    ```
@@ -154,7 +151,7 @@ To pin a specific image version, set the tag in `docker-compose.yml` before depl
 
 ```bash
 # Edit .env (never commit this file)
-nano /opt/data-analyst/.env
+nano <install-dir>/.env
 
 # Restart app to apply changes
 docker compose restart app
@@ -297,7 +294,7 @@ most lock issues.
 docker compose logs app | grep -i "sync\|error\|exception"
 
 # Verify data source credentials in .env
-docker compose exec app da admin tables list
+docker compose exec app agnes admin list-tables
 ```
 
 ### Out of disk space
diff --git a/docs/HEADLESS_USAGE.md b/docs/HEADLESS_USAGE.md
index 2270f17..aa0086d 100644
--- a/docs/HEADLESS_USAGE.md
+++ b/docs/HEADLESS_USAGE.md
@@ -31,8 +31,8 @@ agnes query "SELECT 1"
     AGNES_TOKEN: ${{ secrets.AGNES_TOKEN }}
     AGNES_SERVER: https://agnes.example.com
   run: |
-    pip install data-analyst
-    da sync --all
+    uv tool install "$AGNES_SERVER/cli/wheel/agnes.whl"
+    agnes pull
 ```
 
 ## Revoke
diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md
index baa3dcc..ce86163 100644
--- a/docs/QUICKSTART.md
+++ b/docs/QUICKSTART.md
@@ -48,7 +48,6 @@
 7. Trigger a data sync:
    ```bash
    curl -X POST http://localhost:8000/api/sync/trigger
-   # Or: da sync
    ```
 
 ## Docker Deployment
diff --git a/docs/agent-workspace-prompt.md b/docs/agent-workspace-prompt.md
index 2488295..8c8e1ca 100644
--- a/docs/agent-workspace-prompt.md
+++ b/docs/agent-workspace-prompt.md
@@ -1,23 +1,23 @@
 # Agent Workspace Prompt
 
 The agent workspace prompt is the `CLAUDE.md` file written to each analyst's
-workspace by `da analyst setup`. It gives Claude Code context about the
+workspace by `agnes init`. It gives Claude Code context about the
 connected instance: available tables (RBAC-filtered), business metrics, installed
 plugins, and operational rules for the analyst.
 
 ## When is CLAUDE.md written?
 
-`da analyst setup` fetches `GET /api/welcome` and writes the rendered markdown
+`agnes init` fetches `GET /api/welcome` and writes the rendered markdown
 to `<workspace>/CLAUDE.md` on every run (including `--force` re-initialisation).
 
 To skip writing CLAUDE.md:
 
 ```bash
-da analyst setup --server-url https://agnes.example.com --no-claude-md
+agnes init --server-url https://agnes.example.com --no-claude-md
 ```
 
 **Analysts who ran setup while CLAUDE.md generation was temporarily absent** will
-have their file written on the next `da analyst setup` run. Any existing
+have their file written on the next `agnes init` run. Any existing
 `CLAUDE.md` is overwritten with the current server template.
 
 The companion `CLAUDE.local.md` (at `.claude/CLAUDE.local.md`) is **never**
@@ -110,5 +110,5 @@ PUT validation time, so the admin is notified immediately.
 
 Click **Reset to default** in the admin UI, or call
 `DELETE /api/admin/workspace-prompt-template`. The next analyst who runs
-`da analyst setup` will receive the rich default template from
+`agnes init` will receive the rich default template from
 `config/claude_md_template.txt`.
diff --git a/docs/architecture.md b/docs/architecture.md
index 943931d..f9f5bfa 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -13,7 +13,7 @@ ai-data-analyst/
 │   ├── auth/             Auth providers (JWT, Google OAuth, email magic link, password)
 │   └── web/              HTML dashboard routes
 ├── services/             Standalone background services (scheduler, telegram_bot, ws_gateway, …)
-├── cli/                  CLI tool (da sync, agnes query, agnes admin)
+├── cli/                  CLI tool (agnes pull, agnes query, agnes admin)
 ├── scripts/              Utility and migration scripts
 ├── config/               Instance configuration templates
 ├── tests/                Test suite
@@ -185,7 +185,7 @@ POST /api/sync/trigger (admin)
 - Runs admin-registered SQL through the DuckDB BigQuery extension via `BqAccess.duckdb_session()` and writes the result to `/data/extracts/bigquery/data/<id>.parquet` atomically (`<id>.parquet.tmp` → `os.replace`).
 - Triggered by `_run_materialized_pass` in `app/api/sync.py` between custom-connectors and orchestrator rebuild on every `/api/sync/trigger`. Per-table `sync_schedule` honored via `is_table_due()`.
 - Cost guardrail: BQ dry-run via `app.api.v2_scan._bq_dry_run_bytes` (single source of truth for cost-estimate logic). `data_source.bigquery.max_bytes_per_materialize` (default 10 GiB; `0` disables). Fail-open when dry-run errors (DuckDB three-part syntax the native BQ client can't parse) — log warning + proceed.
-- Distribution: result parquet rides the same manifest + `da sync` flow as Keboola tables. Per-user RBAC unchanged (`resource_grants(group, ResourceType.TABLE, table_id)`).
+- Distribution: result parquet rides the same manifest + `agnes pull` flow as Keboola tables. Per-user RBAC unchanged (`resource_grants(group, ResourceType.TABLE, table_id)`).
 
 ### Jira — Real-Time Push
 
diff --git a/docs/metrics/metrics.yml b/docs/metrics/metrics.yml
index 6478e94..a33c086 100644
--- a/docs/metrics/metrics.yml
+++ b/docs/metrics/metrics.yml
@@ -1,5 +1,5 @@
 version: "2.0"
-description: "Business metrics starter pack. Import with: da metrics import docs/metrics/"
+description: "Business metrics starter pack. Import with: agnes admin metrics import docs/metrics/"
 categories:
   - name: revenue
     folder: revenue/
diff --git a/docs/sample-data.md b/docs/sample-data.md
index 1670649..16a07c7 100644
--- a/docs/sample-data.md
+++ b/docs/sample-data.md
@@ -169,12 +169,13 @@ diff -r run1 run2  # no differences
 To use sample data on a deployed server (instead of connecting a data adapter):
 
 ```bash
-# On the server
-cd /opt/data-analyst/repo
+# On the server, from the install directory containing your repo checkout
+# and Python venv (paths vary per deployment):
+cd <install-dir>/repo
 
 # Generate Parquet files directly using project's ParquetManager
 # (snappy compression, proper column types, metadata embedding)
-/opt/data-analyst/.venv/bin/python scripts/generate_sample_data.py \
+<install-dir>/.venv/bin/python scripts/generate_sample_data.py \
     --size m --format parquet --output /data/src_data/parquet --seed 42
 
 # Set correct permissions
diff --git a/services/corporate_memory/README.md b/services/corporate_memory/README.md
index ca4d5f4..366e2ed 100644
--- a/services/corporate_memory/README.md
+++ b/services/corporate_memory/README.md
@@ -78,7 +78,7 @@ Corporate Memory solves this by making institutional knowledge:
            └──────────┬───────────┘
                       │
            ┌──────────▼───────────┐
-           │    da sync           │
+           │    agnes pull        │
            │                      │
            │  .claude/rules/      │
            │    km_<id>.md        │  ← one per mandatory item
@@ -238,7 +238,7 @@ The highest-ranked facts enter the agent's context first. Mandatory items bypass
 
 ### Claude Code integration
 
-`da sync` writes the bundle as files in `.claude/rules/`:
+`agnes pull` writes the bundle as files in `.claude/rules/`:
 
 ```
 .claude/rules/
@@ -368,7 +368,7 @@ agnes-the-ai-analyst/
 ├── src/repositories/knowledge.py   ← DuckDB CRUD (no SQL in API layer)
 ├── src/db.py                       ← Schema: knowledge_items + 4 supporting tables
 │
-└── cli/commands/sync.py            ← da sync step 7: fetch bundle → write km_*.md
+└── cli/commands/pull.py            ← agnes pull step 7: fetch bundle → write km_*.md
 ```
 
 ---
@@ -401,7 +401,7 @@ An analyst working on sensitive M&A data marks their items as personal. The note
 
 | | Corporate Memory | Static `CLAUDE.md` | Vector RAG | Fine-tuning |
 |---|---|---|---|---|
-| **Update latency** | Next `da sync` (~minutes) | Manual edit + redeploy | Near-realtime | Days to weeks |
+| **Update latency** | Next `agnes pull` (~minutes) | Manual edit + redeploy | Near-realtime | Days to weeks |
 | **Governance** | Approve / reject / audit | None | None | Training data curation |
 | **Confidence scoring** | Yes (source + decay) | No | Similarity score only | Baked into weights |
 | **Contradiction detection** | Yes (auto, per domain) | No | No | No (invisible) |
@@ -454,7 +454,7 @@ Scans `/data/user_sessions/*.jsonl`, extracts knowledge from unprocessed session
 Corporate Memory is wired into Agnes' sync pipeline automatically:
 
 ```
-da sync
+agnes pull
   step 1–6: download tables, rebuild DuckDB views
   step 7: fetch /api/memory/bundle → write .claude/rules/km_*.md
 ```
diff --git a/tests/test_admin_unregister_cleanup.py b/tests/test_admin_unregister_cleanup.py
index 4b5ffd4..b845bc3 100644
--- a/tests/test_admin_unregister_cleanup.py
+++ b/tests/test_admin_unregister_cleanup.py
@@ -1,6 +1,6 @@
 """DELETE /api/admin/registry/{id} for materialized rows must remove the
 materialized parquet file too — otherwise sync_state still has the row,
-the manifest still serves it, and `da sync` keeps trying to download
+the manifest still serves it, and `agnes pull` keeps trying to download
 data for a table that no longer has a registry entry. The orchestrator's
 rebuild path additionally skips parquets that lack a matching
 table_registry row, so a transient race (or operator-deleted parquet)
@@ -184,7 +184,7 @@ def test_delete_remote_bq_row_does_not_touch_data_dir(
 
 def test_delete_clears_sync_state_for_materialized_row(seeded_app, keboola_instance):
     """DELETE must also clear the sync_state row so the manifest stops
-    advertising the dropped table to `da sync`."""
+    advertising the dropped table to `agnes pull`."""
     c = seeded_app["client"]
     token = seeded_app["admin_token"]
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index f263a51..3dce55d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -285,7 +285,7 @@ class TestCatalogMetrics:
     def test_catalog_metrics_help(self):
         result = runner.invoke(app, ["catalog", "--help"])
         assert result.exit_code == 0
-        # `agnes catalog --metrics` replaces the old `da metrics list/show`.
+        # `agnes catalog --metrics` lists business-metric definitions.
         assert "metrics" in result.output.lower()
 
     def test_admin_metrics_help(self):
diff --git a/tests/test_cli_admin_metrics.py b/tests/test_cli_admin_metrics.py
index 8f77168..4f31b5f 100644
--- a/tests/test_cli_admin_metrics.py
+++ b/tests/test_cli_admin_metrics.py
@@ -1,4 +1,4 @@
-"""Tests for `agnes admin metrics {import,export,validate}` (lifted from `da metrics`)."""
+"""Tests for `agnes admin metrics {import,export,validate}`."""
 
 from typer.testing import CliRunner
 
diff --git a/tests/test_cli_catalog_metrics.py b/tests/test_cli_catalog_metrics.py
index 329cd45..835bfac 100644
--- a/tests/test_cli_catalog_metrics.py
+++ b/tests/test_cli_catalog_metrics.py
@@ -1,4 +1,4 @@
-"""Tests for `agnes catalog --metrics` (folded from `da metrics list/show`)."""
+"""Tests for `agnes catalog --metrics`."""
 
 from typer.testing import CliRunner
 
diff --git a/tests/test_keboola_materialized_e2e.py b/tests/test_keboola_materialized_e2e.py
index 90982b6..aee2dde 100644
--- a/tests/test_keboola_materialized_e2e.py
+++ b/tests/test_keboola_materialized_e2e.py
@@ -1,5 +1,5 @@
 """End-to-end: register a Keboola materialized row -> trigger sync ->
-parquet appears -> manifest serves it -> CLI da sync would download it.
+parquet appears -> manifest serves it -> CLI agnes pull would download it.
 
 Skipped unless KBC_TEST_URL + KBC_TEST_TOKEN + KBC_TEST_BUCKET +
 KBC_TEST_TABLE are present.
@@ -68,4 +68,4 @@ def test_register_trigger_manifest_path(seeded_app, monkeypatch, tmp_path):
     assert smoke is not None
     assert smoke["source_type"] == "keboola"
     assert smoke["query_mode"] == "local"  # materialized parquets surface as local
-    assert smoke["md5"]  # has a hash for da sync delta detection
+    assert smoke["md5"]  # has a hash for agnes pull delta detection
diff --git a/tests/test_query_materialized_error_message.py b/tests/test_query_materialized_error_message.py
index b56eab0..e9e6cef 100644
--- a/tests/test_query_materialized_error_message.py
+++ b/tests/test_query_materialized_error_message.py
@@ -23,7 +23,7 @@ def _auth(token: str) -> dict:
 def test_query_materialized_id_not_in_views_returns_helpful_message(seeded_app):
     """An admin querying a materialized id that isn't yet materialized in
     the local analytics.duckdb gets a 400 whose detail names the
-    query_mode and points at `da sync` / direct-BQ-query."""
+    query_mode and points at `agnes pull` / direct-BQ-query."""
     from src.db import get_system_db
     sys_conn = get_system_db()
     try:
diff --git a/tests/test_setup_hooks_template.py b/tests/test_setup_hooks_template.py
index becfbbe..c684630 100644
--- a/tests/test_setup_hooks_template.py
+++ b/tests/test_setup_hooks_template.py
@@ -21,7 +21,7 @@ def test_template_has_session_end_upload():
     ends = cfg.get("hooks", {}).get("SessionEnd", [])
     cmds = [h["command"] for entry in ends for h in entry.get("hooks", [])]
     assert any("agnes push" in c for c in cmds), (
-        f"Expected `da sync --upload-only` in SessionEnd, got {cmds}"
+        f"Expected `agnes push` in SessionEnd, got {cmds}"
     )
 
 
diff --git a/tests/test_sync_trigger_materialized.py b/tests/test_sync_trigger_materialized.py
index 7667a5b..7564163 100644
--- a/tests/test_sync_trigger_materialized.py
+++ b/tests/test_sync_trigger_materialized.py
@@ -156,7 +156,7 @@ def test_materialized_pass_collects_errors_per_row(system_db, stub_bq, tmp_path)
 
 def test_materialized_pass_records_parquet_hash(system_db, stub_bq, tmp_path):
     """sync_state.hash must be the MD5 of the parquet file — otherwise the
-    manifest reports an empty hash and every da sync re-downloads."""
+    manifest reports an empty hash and every agnes pull re-downloads."""
     repo = TableRegistryRepository(system_db)
     repo.register(
         id="hashed", name="hashed",