diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 3fab1da..1848607 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -15,7 +15,5 @@ services: image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable} ws-gateway: image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable} - corporate-memory: - image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable} - session-collector: - image: ghcr.io/keboola/agnes-the-ai-analyst:${AGNES_TAG:-stable} + # corporate-memory and session-collector were dropped in #176 — + # the scheduler container drives them via /api/admin/run-* endpoints. diff --git a/docker-compose.yml b/docker-compose.yml index 40e7bc4..4240d28 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -88,33 +88,15 @@ services: - full restart: unless-stopped - corporate-memory: - build: . - command: python -m services.corporate_memory - volumes: - - data:/data - env_file: .env - environment: - - DATA_DIR=/data - depends_on: - - app - profiles: - - full - restart: unless-stopped - - session-collector: - build: . - command: python -m services.session_collector - volumes: - - data:/data - env_file: .env - environment: - - DATA_DIR=/data - depends_on: - - app - profiles: - - full - restart: unless-stopped + # NOTE: corporate-memory + session-collector previously ran here as + # tight `restart: unless-stopped` boot loops behind `profiles: [full]`. + # As of #176 the scheduler container drives both through admin HTTP + # endpoints (/api/admin/run-corporate-memory, + # /api/admin/run-session-collector). The verification-detector job + # was never in compose; it now ships the same way. The app remains + # the sole writer to system.duckdb. Operators previously running + # COMPOSE_PROFILES=full need to drop those service stanzas from any + # custom Compose overrides. # TLS reverse proxy. Corporate-CA certs mounted from /data/state/certs # (managed by scripts/ops/agnes-tls-rotate.sh on the VM). For local diff --git a/docs/architecture.md b/docs/architecture.md index f9f5bfa..27fa604 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -55,8 +55,8 @@ system.duckdb analytics.duckdb ``` **Deployment:** Docker Compose. The `app` service runs Uvicorn. The `scheduler` sidecar triggers -sync jobs via the app's REST API. Optional `full` profile adds telegram-bot, ws-gateway, -corporate-memory, session-collector. +sync jobs and the LLM pipeline (corporate-memory, verification-detector, session-collector) via +the app's REST API on offset cadences. Optional `full` profile adds telegram-bot and ws-gateway. ```bash docker compose up # app + scheduler @@ -330,12 +330,12 @@ Docker Compose service. | Service | Profile | Schedule / Mode | Description | |---------|---------|-----------------|-------------| -| `scheduler` | default | Always-on; polls every N seconds | Lightweight sidecar that triggers jobs via the app's REST API (`POST /api/sync/trigger` every 15 min, `GET /api/health` every 5 min). Auth via `SCHEDULER_API_TOKEN` or auto-fetch from `/auth/token`. | +| `scheduler` | default | Always-on; polls every N seconds | Lightweight sidecar that triggers jobs via the app's REST API: `POST /api/sync/trigger` every 15 min, `GET /api/health` every 5 min, `POST /api/admin/run-session-collector` every 10 min, `POST /api/admin/run-verification-detector` every 15 min, `POST /api/admin/run-corporate-memory` every 17 min, `POST /api/marketplaces/sync-all` daily 03:00. Auth via `SCHEDULER_API_TOKEN` or auto-fetch from `/auth/token`. | | `telegram_bot` | `full` | Always-on (long-poll) | Telegram bot: polling + HTTP dispatch, `/status` command, notification script execution. | | `ws_gateway` | `full` | Always-on | WebSocket gateway (TCP 8765) + HTTP dispatch socket. JWT auth. Per-user connection limit (5). Heartbeat ping/pong. | -| `corporate_memory` | `full` | Periodic (every 30 min) | Scans `CLAUDE.local.md` files, extracts knowledge via LLM (Claude Haiku), writes to `knowledge_items` in system.duckdb. Inline contradiction detection runs after each new item: one batched Haiku structured-output call returns judgments + structured resolution suggestions for every same-domain candidate (no SQL keyword pre-filter — see [ADR Decision 4](ADR-corporate-memory-v1.md)). | -| `verification_detector` | `full` (run via `corporate_memory`) | On each `corporate_memory` tick | Scans unprocessed analyst session JSONLs, extracts corrections / confirmations / unprompted definitions via Haiku structured outputs. Confidence is computed in code from `(source_type, detection_type)` — never trusted from the LLM. Each verification persists a `verification_evidence` row carrying `user_quote` + `detection_type` ([ADR Decision 3](ADR-corporate-memory-v1.md)). | -| `session_collector` | `full` | Periodic (every 6 h) | Copies Claude Code `.jsonl` session transcripts to central storage. | +| `corporate_memory` | (driven by scheduler) | Every 17 min | Scans `CLAUDE.local.md` files, extracts knowledge via LLM (Claude Haiku), writes to `knowledge_items` in system.duckdb. Inline contradiction detection runs after each new item: one batched Haiku structured-output call returns judgments + structured resolution suggestions for every same-domain candidate (no SQL keyword pre-filter — see [ADR Decision 4](ADR-corporate-memory-v1.md)). Driven by scheduler-v2 since #176. | +| `verification_detector` | (driven by scheduler) | Every 15 min | Scans unprocessed analyst session JSONLs, extracts corrections / confirmations / unprompted definitions via Haiku structured outputs. Confidence is computed in code from `(source_type, detection_type)` — never trusted from the LLM. Each verification persists a `verification_evidence` row carrying `user_quote` + `detection_type` ([ADR Decision 3](ADR-corporate-memory-v1.md)). Driven by scheduler-v2 since #176. | +| `session_collector` | (driven by scheduler) | Every 10 min | Copies Claude Code `.jsonl` session transcripts to central storage. Driven by scheduler-v2 since #176. | Files NOT to modify: `services/ws_gateway/` (stable WebSocket infrastructure). diff --git a/tests/test_docker_compose.py b/tests/test_docker_compose.py new file mode 100644 index 0000000..e2eca94 --- /dev/null +++ b/tests/test_docker_compose.py @@ -0,0 +1,66 @@ +"""Static contract tests for docker-compose.yml. + +The corporate-memory and session-collector side-car services were dropped +in #176 — the scheduler container now drives them through HTTP. These +tests pin that contract so the services can't quietly come back. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + + +@pytest.fixture(scope="module") +def compose() -> dict: + root = Path(__file__).resolve().parent.parent + return yaml.safe_load((root / "docker-compose.yml").read_text()) + + +class TestComposeServicesRemoved: + """The two side-car services must not exist in docker-compose.yml.""" + + def test_corporate_memory_service_removed(self, compose): + assert "corporate-memory" not in compose["services"], ( + "corporate-memory was dropped in #176 — scheduler drives it via HTTP. " + "Do not re-add the service stanza." + ) + + def test_session_collector_service_removed(self, compose): + assert "session-collector" not in compose["services"], ( + "session-collector was dropped in #176 — scheduler drives it via HTTP. " + "Do not re-add the service stanza." + ) + + +class TestComposeSchedulerWires: + """The scheduler service must remain — it's the sole driver now.""" + + def test_scheduler_service_present(self, compose): + assert "scheduler" in compose["services"] + scheduler = compose["services"]["scheduler"] + assert scheduler["command"] == "python -m services.scheduler" + + def test_app_service_present(self, compose): + assert "app" in compose["services"] + + +class TestComposeNoBootLoopProfile: + """No service that imports anthropic / openai should ship as a tight + `restart: unless-stopped` boot loop. The previous corporate-memory and + session-collector stanzas were exactly this footgun.""" + + def test_only_scheduler_is_unconditional_long_running(self, compose): + # Services WITHOUT a `profiles:` key run on default `docker compose up`. + always_running = [ + name + for name, svc in compose["services"].items() + if "profiles" not in svc + ] + # Expected always-running set on a default deploy: app + scheduler. + # extract is one-shot so it has profiles=[extract]; caddy/telegram-bot/ + # ws-gateway are all behind profiles too. + for boot_loop_offender in ("corporate-memory", "session-collector"): + assert boot_loop_offender not in always_running