From 78cd243e65402ac2542be3b0c527343a7ef82b02 Mon Sep 17 00:00:00 2001 From: Vojtech <119944107+cvrysanek@users.noreply.github.com> Date: Fri, 15 May 2026 23:21:14 +0400 Subject: [PATCH] fix(store): promote-on-approve looks up version_no by submission_id (live agnes-development bug) (#330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(store): promote-on-approve looks up version_no by submission_id Live bug observed on agnes-development: an entity had 5+ version_history rows sharing the same `hash` (user re-uploaded byte-identical bundles as v2/v4/v6 of the same skill — the LLM and inline checks happily approved each one). The runner's promote-on-approve path looked up the submission's version_no by hash: for entry in entity.version_history: if entry["hash"] == sub_hash: target = int(entry["n"]); break The loop matched the FIRST hash collision — always v1, n=1. With current=1, the forward-only `target > current` guard then skipped the promote, leaving the entity stuck at v1 even though the new submission's status flipped to `approved`. UI kept showing v1 as "current". Fix: look up by submission_id via the existing `_version_no_for_submission` helper (already used by retry / rescan / download paths). Same lookup applied in `admin_override_store_submission` which had the identical hash-match loop. Test: TestPromoteLookupByByteIdenticalBundles uploads v1 + a byte-identical v2, drives the LLM with mock-approve, asserts entity.version_no advances to 2. * fix: bundle #329 reviewer-Important follow-ups + post-merge polish Bundled with Vojtech's commit ahead of this (the promote-on-approve `version_no` lookup-by-submission_id fix) since #330 is the next release-cut PR and the four #329 follow-ups would otherwise need a standalone release-cut PR — prohibited by docs/RELEASING.md § "Release-cut belongs to the PR". Fixed: - src/usage_ask.py — SCHEMA_DIGEST + SYSTEM_PROMPT referenced the dropped `usage_plugin_daily` table. The admin `POST /api/admin/telemetry/ask` endpoint ships SYSTEM_PROMPT to the LLM, so any model-emitted SQL against `usage_plugin_daily` would fail with a DuckDB binder error post-#329 merge. Updated to describe the new v48 rollups (`usage_marketplace_item_daily` / `_window`) and rule 5 of the prompt to point at them. Internal: - CHANGELOG.md [0.54.20] section restored to its canonical content from the v0.54.20 git tag. The #329 self-merge carried 226 lines of author's pre-rebase bullets that ended up mis-attributed; the published v0.54.20 GitHub Release (FTS BM25 + batch bar) now matches the CHANGELOG section verbatim. Also fills in [Unreleased] with this PR's bullets (Fixed + Internal). - tests/conftest.py — dropped the unused `conn_with_usage_schema_and_attribution` fixture that INSERTed into the now-removed `usage_attribution_*` tables. Zero callers today, but a tripwire — the first future test to request it would have failed with a binder error. - app/web/templates/marketplace.html — replaced a customer-specific token (`groupon-marketplace`) in the Most Popular sort-tiebreaker comment with a generic `-marketplace` placeholder per CLAUDE.md § Vendor-agnostic OSS. Also scrubbed an `agnes-development` reference in app/api/admin.py and src/store_guardrails/runner.py (cherry-picked from Vojtech's commit) on the same hygiene rule. * release: 0.54.22 — flea-market promote-by-submission_id fix + #329 reviewer follow-ups --------- Co-authored-by: ZdenekSrotyr --- CHANGELOG.md | 268 +++++----------------------- app/api/admin.py | 21 ++- app/web/templates/marketplace.html | 4 +- pyproject.toml | 2 +- src/store_guardrails/runner.py | 31 ++-- src/usage_ask.py | 27 ++- tests/conftest.py | 32 ---- tests/test_store_entity_versions.py | 100 +++++++++++ 8 files changed, 195 insertions(+), 290 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c00480..8575a70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,47 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.54.22] — 2026-05-15 + +### Fixed +- **Flea-market — promote-on-approve + admin-override now look up + the submission's `version_no` in `version_history` by + `submission_id`, not by `hash`.** Hash-based lookup broke whenever + the user uploaded byte-identical bundles across versions (e.g. + same content as v2 and v4): the loop matched the FIRST history + entry with that hash — always v1 — so `target_version_no` landed + at 1, the forward-only `target > current` guard skipped the + promote, and the entity stayed stuck at v1 even though the new + submission was `status='approved'`. UI kept showing v1 as + "current". Both `runner.run_llm_review` (background auto-approve) + and `admin_override_store_submission` now reuse the existing + `_version_no_for_submission` helper. Closes the live development- + deployment case where an entity had 5+ identical-hash history + rows. +- **Admin "ask telemetry" feature** (`POST /api/admin/telemetry/ask`) + no longer emits SQL against the dropped `usage_plugin_daily` + table. `src/usage_ask.py` `SCHEMA_DIGEST` and `SYSTEM_PROMPT` + bumped to describe the v48 rollups (`usage_marketplace_item_daily` + / `_window`) and rule 5 of the prompt updated. Pre-fix, the LLM + would happily emit `SELECT … FROM usage_plugin_daily` per the + stale prompt and the DuckDB binder would reject it. + +### Internal +- **CHANGELOG `[0.54.20]` section restored to its canonical content + from the `v0.54.20` git tag.** The #329 self-merge had carried 226 + lines of author's pre-rebase bullets that ended up mis-attributed + to `[0.54.20]`; the published v0.54.20 GitHub Release (FTS BM25 + + batch bar) now matches the CHANGELOG section verbatim. +- `tests/conftest.py` — dropped the unused + `conn_with_usage_schema_and_attribution` fixture that seeded into + the now-removed `usage_attribution_*` tables. Zero callers today + but a tripwire — the first future test to request it would have + failed with a DuckDB binder error. +- `app/web/templates/marketplace.html` — replaced a customer- + specific token (`groupon-marketplace`) in the Most Popular sort- + tiebreaker comment with a generic `-marketplace` + placeholder per `CLAUDE.md § Vendor-agnostic OSS`. + ## [0.54.21] — 2026-05-15 ### Added @@ -131,233 +172,6 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C / Reject stay scoped to Review per #129's scope decision (status flips belong with the per-row actions or the keyboard workflow). Closes #129. -- Marketplace telemetry rollup tables (schema v48): - - `usage_marketplace_item_daily` — per-day fact (count, distinct_users, - error_count) keyed by (day, source, type, parent_plugin, name). - - `usage_marketplace_item_window` — sliding-window snapshot with true - cross-window distinct user counts; `period_label='last_7d'` - refreshed every UsageProcessor tick, `period_label='last_30d'` - refreshed hourly. -- `InnerDetailResponse.telemetry` — 30-day invocation + distinct-user - metrics surfaced on curated inner skill / agent detail pages - (`/marketplace/curated///skill/` and `…/agent/`). -- `scripts/backfill_marketplace_rollup.py` — one-shot script to populate - the new rollup tables from historic `usage_events` after a v46 deploy. - -### Changed -- **BREAKING (operator-facing)**: flea-market guardrail pipeline now - fail-CLOSED on misconfig. `get_guardrails_enabled()` previously - conflated operator intent (`guardrails.enabled` YAML) with provider - readiness (`ANTHROPIC_API_KEY` env) — when intent was True but the - env var was missing, the pipeline silently auto-fell-back to disabled - and every upload landed `approved` without an LLM review. Split into - `get_guardrails_enabled()` (intent only) and a new - `get_guardrails_llm_provider_ready()` (env only). Three-state matrix: - `enabled=false → auto-approve` (unchanged), `enabled=true + ready → - normal hold-for-review` (unchanged), `enabled=true + not-ready → - submissions sit at `pending_llm`, no auto-approval` (new — was - silent auto-approval). Admin **Retry review** action on - `/admin/store/submissions/` now covers `pending_llm` too (was - `review_error` + `blocked_llm`). Boot-time `WARNING` banner surfaces - the misconfig in `app/main.py`. Operators who relied on the - auto-fallback for local-dev no-LLM setups must now explicitly set - `guardrails.enabled: false` in `instance.yaml` — same outcome, - explicit intent. -- Flea-market admin **Override** action on - `/admin/store/submissions/` now covers `pending_llm` submissions - too (was `blocked_inline` + `blocked_llm` + `review_error`). Closes - a UX gap created by the new fail-CLOSED behavior above: under - enabled-but-not-ready, a known-good submission would otherwise sit - indefinitely until the admin set credentials AND clicked **Retry - review**. Override already routes through `entity.version_history` - to resolve the correct version dir (and is now forward-only on - promote — see the Fixed bullet below), so it stays safe for v2+ - deferred-promotion submissions. -- The `/corporate-memory` domain filter dropdown now offers the full - `VALID_DOMAINS` enum (finance / engineering / product / data / - operations / infrastructure), not just domains that already have ≥1 - item — the old behavior collapsed to a single option on fresh stores. -- **BREAKING (UI):** the Curated Memory admin review queue moved from - `/corporate-memory/admin` to `/admin/corporate-memory` — no redirect. - It is now reached from the Admin nav dropdown ("Memory Review"). -- `/corporate-memory` (Curated Memory) is no longer admin-only — the - route runs on `get_current_user`, matching the already-open - `/api/memory/*` endpoints. The pending-review banner stays admin-only - (suppressed server-side for non-admins). -- Both Curated Memory pages now render the shared `_page_hero.html` - header band instead of bespoke per-page chrome, matching catalog / - me-activity / admin pages. -- **BREAKING:** `MarketplaceItem.unique_users_30d` renamed to - `distinct_users_30d` in the `/api/marketplace/items` response. The new - value is a true distinct count across the 30-day window (from the - `usage_marketplace_item_window` snapshot), not the old sum-of-daily - proxy that over-counted active multi-day users. -- `usage_events.source` is now populated per-event by `MarketplaceItemLookup` - (live join against `marketplace_plugins` + `store_entities`). Previously - it sat at `'builtin'` for every row because the v42 `AttributionLookup` - matched skill/command names without the plugin prefix that Claude Code - actually writes — `usage_events.source = 'curated'` / `'flea'` / - `'builtin'` becomes meaningful for the first time. -- `usage_events.ref_id` semantics — now stores the plain plugin name - (curated) instead of `/`; flea entities - store `NULL` (no parent plugin). Admin telemetry endpoints - (`/api/admin/telemetry/{summary,query,facets,export}`) keep their - `GROUP BY ref_id` / `source` shapes — bucket values shift to the - new semantics. -- The marketplace browse card no longer renders the invocation chip - (`🔥 N uses · ↑ X%`) pending UX finalisation. `invocations_30d`, - `distinct_users_30d`, and `trend_pct` remain in the API response for - the upcoming Most-Used / Trending sections and the detail pages. -- `USAGE_PROCESSOR_VERSION` bumped 4 → 5 so the session-pipeline - reprocess loop re-attributes historic events to the new - source/ref_id semantics on the next tick. - -### Removed -- **BREAKING:** four schema-v42 telemetry tables (v48 migration): - - `usage_attribution_skills`, `usage_attribution_agents`, and - `usage_attribution_commands` — replaced by live prefix-split lookup - against `marketplace_plugins` + `store_entities`. Verified empty or - derivable; no unique data lost. - - `usage_plugin_daily` — replaced by `usage_marketplace_item_daily` - + `_window`. Verified empty in production-shape data (the v42 rollup - INSERT was gated on `source IN ('curated','flea')` but the broken - attribution layer always produced `'builtin'`). -- `src/repositories/usage_attribution.py`, `src/usage_attribution_helpers.py`, - `scripts/backfill_usage_attribution.py`, and their test fixtures - (`tests/test_usage_attribution.py`, - `tests/test_backfill_usage_attribution.py`) — no callers remain. - -### Fixed -- **Unauthenticated browser requests to `GET /api/initial-workspace.zip` now redirect to `/login?next=/api/initial-workspace.zip` instead of returning a raw JSON 401** (#315). This is the one `/api/*` endpoint that's designed to be hit directly from a browser bookmark (the analyst clean-install zip), so it intentionally opts out of the global `_API_PATH_PREFIXES` "never redirect /api/*" contract in `app/main.py`. CLI / curl / other API clients (any `Accept` without `text/html` — including the `*/*` default) keep getting the 401 they can handle. -- Flea-market LLM security review failed with `LLMFormatError: Response - truncated (max_tokens) for schema store_guardrails_review` when the - reviewer emitted many findings or content-quality issues. Raised the - output budget (2500 → 6000 tokens) and added a one-shot - retry-with-doubled-budget in the Anthropic provider (up to 4× initial) - so the verdict survives an occasional verbose response instead of - pinning the submission in `review_error`. (We initially added - `maxItems=20` to the schema's `findings` and `content_quality.issues` - arrays, but Anthropic's structured-output validator rejects `maxItems` - on array types — removed.) -- Flea-market entity detail page surfaces the latest submission's - failure verdict even when a previously-approved version is still - serving (deferred-promotion path). The owner / admin used to see no - banner at all when a v2+ edit landed in `review_error`, - `blocked_llm`, or `blocked_inline` because the `_quarantine_banner` - partial gated on `entity.visibility_status != 'approved'`. The - banner now renders for those failure statuses too, with copy that - acknowledges the prior version is still live ("Latest edit failed - review — previously approved version (vN) keeps serving …"). -- Flea-market `/api/store/bundle.zip` export now filters by - `visibility_status='approved'` for non-admin non-owner callers. - Previously an authenticated user could call the bundle endpoint - and pull pending / blocked / hidden v1 bytes — bypassing the - publish gate the same way `_enforce_visibility` already prevents - on detail pages + install. Owners still see their own non-approved - entries (matches the browse-listing `include_owner_id` affordance); - admins still see everything. (Critical — surfaced by adversarial - review.) -- Flea-market PUT (edit) + restore endpoints now serialize concurrent - writes against the same `entity_id` via a per-entity asyncio lock. - Two concurrent PUTs could previously both pass the - `latest_for_entity` pending gate, both bake into - `versions/v/plugin/`, and both append a `version_history` - entry. The lock closes the window for single-process deployments; - multi-worker deployments still have a residual window (tracked in - the follow-up issue). (High — surfaced by adversarial review.) -- Flea-market `StoreSubmissionsRepository.update_status` is now - compare-and-swap on terminal statuses (`approved`, `overridden`, - `blocked_inline`). A late background-task LLM verdict racing with - an admin override or with a more recent terminal verdict can no - longer silently clobber the row. Callers that legitimately need to - overwrite a terminal state pass `allow_terminal_overwrite=True` - explicitly. Returns a boolean indicating whether the write landed. - `runner.run_llm_review` now honors the bool on both its `approved` - and `blocked_llm` branches: a CAS no-op skips the downstream - cascade (visibility flip, version promote, the verdict-specific - audit entry that would otherwise contradict the row) and logs a - single `store.submission.bg_verdict_skipped` audit row instead, - so an operator reviewing the queue sees dropped verdicts - explicitly rather than via row-vs-audit contradiction. - (High — surfaced by adversarial review.) -- Flea-market admin **Retry review** and **Rescan** now review the - STAGED version's bundle, not the live `plugin/` directory. For a - v2+ edit held at `pending_llm` / `blocked_llm` / `review_error`, - live still holds the prior approved version. Reviewing live would - produce a verdict against the WRONG bytes; the runner's hash-match - promotion would then advance the entity to staged bytes that were - never actually reviewed. Now resolves the staged - `versions/v/plugin/` from the submission's `version_history` - entry. (Critical — surfaced by adversarial review.) -- Flea-market admin **Override** is now forward-only on promotion. - Previously `target_version_no != current` would happily demote the - live bundle when admin overrode a stale v2 submission while v3 was - already approved + live. Changed to `target > current` so override - flips status + visibility on the row regardless, but on-disk - promotion only fires for newer versions. The same `> current` - guard is now applied defensively in `runner.run_llm_review` so a - late LLM verdict can't demote past a more recent approval either. - (High — surfaced by adversarial review.) -- Flea-market admin **Override** on a v2+ edit/restore submission now - promotes the entity to the overridden version + swaps the on-disk - live bundle. Pre-fix the override only flipped - `visibility_status='approved'` and `submission.status='overridden'`, - leaving `entity.version_no` at the prior approved version — so - installers (and the marketplace UI) kept serving the OLD bytes the - admin just intended to replace. Mirrors the auto-approval branch in - `runner.run_llm_review`: look up the submission's version in - `version_history`, `promote_version` + `_swap_live_to_version` when - it differs from current. Initial-v1 overrides unchanged (no - promotion needed). -- Flea-market Restore button + endpoint no longer allow restoring a - version that was never approved. The versions card hid the gate - entirely (showed Restore on any non-current row), and the backend - blocked only while the latest submission was `pending_*` — so a - `blocked_llm` / `review_error` version could be restored anyway. - Added `submission_status` decoration on `version_history` via a new - `StoreEntitiesRepository.get_with_version_approvals` helper, gated - the UI button on `submission_status in ('approved', None)` - (`None` is the legacy v1 seed, back-compat-treated as approved), - rendered status pills for blocked / errored / pending rows, and - added a 400 `version_not_approved` guard in - `POST /api/store/entities/{id}/versions/{n}/restore`. -- `/me/activity` hero subtitle showed literal `` tags - around the user's email instead of rendering them bold. The subtitle - was built by `~`-concatenating a `Markup` operand (`user.email | e`) - with HTML string literals, which made Jinja2's `markup_join` escape - the literal tags too. Switched to `{% set %}…{% endset %}` block - capture so the literal `` stays HTML while the email is still - autoescaped. - -### Internal -- `StoreEntitiesRepository.get_with_version_approvals` now defensively - copies each `version_history` entry before annotating with - `submission_status`. `self.get()` re-parses JSON each call today so - this is belt-and-suspenders, but it protects any future caching layer - from leaking the annotated key into a subsequent plain `get()` call. -- `corporate_memory_admin.html` renamed to `admin_corporate_memory.html` - to match the `admin_*` template convention; dropped dead `.ck-topbar` / - `.page-header` inline CSS (the latter collided with the design-system - `.page-header--hero` selector). -- `usage_tool_daily` flagged as candidate for removal. The table is - still populated by `rebuild_rollups` for backwards compatibility with - the `usage_ask` schema digest, but has no product-UI consumer after - the v46 marketplace telemetry refactor. Will be evaluated for drop in - the next iteration. -- CI test suite sharded for speed. The `test` job in `.github/workflows/ci.yml` is now a `test-shard` matrix — 4 parallel jobs via `pytest-split`, balanced by a committed `.test_durations` file — aggregated into a single `test` status check so branch protection needs no change. The duplicate full-suite `test` job in `release.yml` is removed (it re-ran the same ~10 min suite a second time on every push to main/feature branches); `release.yml` is now image-build only, with the advisory ruff/mypy steps moved to a lean `lint` job in `ci.yml`. Net: ~10 min → ~3 min wall-clock per push, and the suite runs once instead of twice. Adds `pytest-split` to the `dev` extra. -- CI/release workflow polish (the still-salvageable subset of the - abandoned PR #139, after #311 obsoleted the test-job refactor): - `rollback.yml` extracts the `release.yml` smoke-test rollback into a - reusable + manually dispatchable workflow, with a warning guard on - non-`stable-*` `workflow_dispatch` inputs. `prune-dev-tags.yml` adds - weekly housekeeping (Sundays 04:00 UTC) of legacy CalVer git tags + - GHCR images outside a `KEEP_MONTHS` retention window; floating - aliases are git-tagless and never matched. `lint-workflows.yml` runs - `actionlint` on `.github/workflows/**` + `scripts/ops/**.sh` changes - (non-blocking initially). The superseded `deploy.yml` stub is removed. - Excludes #139's rejected pieces (Release Drafter, setuptools_scm, - run-number tag scheme, main-only release triggers, deletion of - `cli-wheel-clean-install`). ### Internal - **Schema v47** — adds DuckDB `fts` BM25 index over diff --git a/app/api/admin.py b/app/api/admin.py index fa8a7e2..8e8e75c 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -3754,15 +3754,18 @@ async def admin_override_store_submission( # just no-ops and we skip promotion harmlessly. entity_row = ents_repo.get(entity_id) or {} promoted_to: Optional[int] = None - sub_hash = sub.get("version") - target_version_no: Optional[int] = None - for entry in (entity_row.get("version_history") or []): - if entry.get("hash") == sub_hash: - try: - target_version_no = int(entry.get("n")) - except (TypeError, ValueError): - target_version_no = None - break + # Look up THIS submission's version entry by submission_id, NOT + # by hash. Hash-based lookup breaks when the user re-uploads + # byte-identical bundles (e.g. v2 same content as v1): the loop + # picks the FIRST history entry with that hash (always v1, n=1), + # so target_version_no lands at 1 instead of the actual new + # entry's n. The forward-only `target > current` guard then + # skips the promote, leaving the entity stuck at v1. Surfaced + # live on a development deployment. + from app.api.store import _version_no_for_submission + target_version_no: Optional[int] = _version_no_for_submission( + entity_row, submission_id, + ) # Forward-only: refuse to promote backwards. An admin overriding a # stale v2 submission when v3 is already approved + live must NOT # demote the live bundle back to v2's bytes. Override flips the diff --git a/app/web/templates/marketplace.html b/app/web/templates/marketplace.html index bfe8a36..3be1b7b 100644 --- a/app/web/templates/marketplace.html +++ b/app/web/templates/marketplace.html @@ -1051,8 +1051,8 @@ async function loadMostPopular() { // 4. invocations_7d DESC ← recent volume // 5. name ASC ← deterministic textual order // 6. marketplace_slug ASC ← splits duplicate plugin names - // across multiple marketplaces (e.g. `grpn` in `groupon- - // marketplace` vs. test seeds) + // across multiple marketplaces (e.g. a `foo` plugin shipped + // both by `-marketplace` and by curated test seeds) const r = await fetchOne(state.tab, { sort: 'most_used', page: 1, page_size: 24 }); const cmp = (a, b) => { const cmpN = (x, y) => (y || 0) - (x || 0); diff --git a/pyproject.toml b/pyproject.toml index 9da91b7..b920242 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.54.21" +version = "0.54.22" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" diff --git a/src/store_guardrails/runner.py b/src/store_guardrails/runner.py index 6e55e49..6ec7faf 100644 --- a/src/store_guardrails/runner.py +++ b/src/store_guardrails/runner.py @@ -283,18 +283,25 @@ def run_llm_review( # serve-able state. Archive / hidden-by-admin paths # leave alone. if current_visibility == "approved": - # v46: attribution lookup is live — the next - # UsageProcessor tick preloads the approved entity by name. - sub_row = subs_repo.get(submission_id) or {} - sub_hash = sub_row.get("version") - target_version_no = None - for entry in (ent_row.get("version_history") or []): - if entry.get("hash") == sub_hash: - try: - target_version_no = int(entry.get("n")) - except (TypeError, ValueError): - target_version_no = None - break + # v48 (#329): attribution lookup is live — the next + # UsageProcessor tick preloads the approved entity by + # name against marketplace_plugins / store_entities; + # no cached attribution rows to refresh on promote. + # Look up THIS submission's version entry by + # submission_id, NOT by hash. Hash-based lookup + # breaks when a user re-uploads byte-identical + # bundles (e.g. v2 same content as v1): the loop + # picks the FIRST history entry with that hash + # (always v1, n=1), so `target_version_no` lands at + # 1 instead of the actual new entry's n. The + # forward-only `target > current` guard then skips + # the promote, leaving the entity stuck at v1. + # Surfaced live on a development deployment with + # an entity that had 5+ identical-hash history rows. + from app.api.store import _version_no_for_submission + target_version_no = _version_no_for_submission( + ent_row, submission_id, + ) # Forward-only promotion. A late verdict landing for # an older submission must NOT demote the live bundle # past a version that was approved more recently. diff --git a/src/usage_ask.py b/src/usage_ask.py index c30a4be..4f5113e 100644 --- a/src/usage_ask.py +++ b/src/usage_ask.py @@ -66,14 +66,27 @@ TABLE usage_tool_daily distinct_sessions INTEGER PRIMARY KEY (day, tool_name, source) -TABLE usage_plugin_daily +TABLE usage_marketplace_item_daily day DATE NOT NULL - source VARCHAR NOT NULL - ref_id VARCHAR NOT NULL - invocations INTEGER + source VARCHAR NOT NULL -- 'curated' | 'flea' | 'builtin' + type VARCHAR NOT NULL -- 'plugin' | 'skill' | 'agent' | 'command' + parent_plugin VARCHAR NOT NULL -- '' for top-level plugins; '' for inner items + name VARCHAR NOT NULL + count INTEGER distinct_users INTEGER - distinct_sessions INTEGER - PRIMARY KEY (day, source, ref_id) + error_count INTEGER + PRIMARY KEY (day, source, type, parent_plugin, name) + +TABLE usage_marketplace_item_window + period_label VARCHAR NOT NULL -- 'last_7d' | 'last_30d' + source VARCHAR NOT NULL + type VARCHAR NOT NULL + parent_plugin VARCHAR NOT NULL + name VARCHAR NOT NULL + invocations INTEGER + distinct_users INTEGER -- TRUE distinct across the window (not summed from daily) + refreshed_at TIMESTAMP + PRIMARY KEY (period_label, source, type, parent_plugin, name) """ SYSTEM_PROMPT = """You translate natural-language questions into DuckDB SELECT statements over a telemetry schema. @@ -83,7 +96,7 @@ Rules: 2. SELECT-only — never INSERT, UPDATE, DELETE, DROP, CREATE, ALTER, ATTACH, COPY, PRAGMA, or any side-effect statement. 3. No semicolons except optionally one at the end. 4. No CTE that contains a write. -5. Prefer rollup tables (`usage_tool_daily`, `usage_plugin_daily`) for date-range aggregations; use `usage_events` for forensic detail. +5. Prefer rollup tables for date-range aggregations: `usage_tool_daily` (per-tool), `usage_marketplace_item_daily` (per marketplace item — plugin / skill / agent / command keyed by source + type + parent_plugin + name), and `usage_marketplace_item_window` (true-distinct counts across `last_7d` / `last_30d` snapshots). Use `usage_events` for forensic detail. The pre-v48 `usage_plugin_daily` and `usage_attribution_*` tables are gone — do not reference them. 6. Use DuckDB-flavor SQL: `CURRENT_DATE`, `INTERVAL 7 DAY`, `DATE_TRUNC('week', ...)`, `EPOCH`, etc. 7. Limit large result sets — default `LIMIT 100` unless the question asks for ALL rows. diff --git a/tests/conftest.py b/tests/conftest.py index bb9b24d..bec28ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -380,38 +380,6 @@ from tests.fixtures.analyst_bootstrap import ( # noqa: E402,F401 ) -@pytest.fixture -def conn_with_usage_schema_and_attribution(tmp_path, monkeypatch): - """DuckDB conn at v41 with attribution rows seeded for fixture skills/agents/commands.""" - monkeypatch.setenv("DATA_DIR", str(tmp_path)) - import src.db as db_module - db_module._system_db_conn = None - db_module._system_db_path = None - c = db_module.get_system_db() - # Seed attribution rows the fixtures reference - c.execute( - "INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)" - " VALUES ('curated', 'mp/plug', 'my-skill')" - ) - c.execute( - "INSERT OR IGNORE INTO usage_attribution_skills (source, ref_id, skill_name)" - " VALUES ('flea', 'entity-1', 'flea-skill')" - ) - c.execute( - "INSERT OR IGNORE INTO usage_attribution_agents (source, ref_id, agent_name)" - " VALUES ('curated', 'mp/plug', 'my-agent')" - ) - c.execute( - "INSERT OR IGNORE INTO usage_attribution_commands (source, ref_id, command_name)" - " VALUES ('curated', 'mp/plug', 'compound:debug')" - ) - yield c - try: - c.close() - except Exception: - pass - - @pytest.fixture def bq_instance(monkeypatch): """Force instance.yaml to look like a BigQuery deployment for the diff --git a/tests/test_store_entity_versions.py b/tests/test_store_entity_versions.py index 33123ec..0bb5c16 100644 --- a/tests/test_store_entity_versions.py +++ b/tests/test_store_entity_versions.py @@ -1706,3 +1706,103 @@ class TestAtomicPromote: f"DB must NOT advance when live swap can't happen; got " f"version_no={ent_after['version_no']}" ) + + +class TestPromoteLookupByByteIdenticalBundles: + """Live-issue regression observed on a development deployment: an + entity had multiple version_history rows sharing the same `hash` + (user re-uploaded byte-identical bundles as v2/v4/v6). The runner's + promote-on-approve path looked up the submission's version_no + in version_history BY HASH and broke on the FIRST match — always + v1. With v1's n=1 and current=1, the forward-only + `target > current` guard skipped the promote, so the passing + LLM verdict never advanced the entity. UI kept showing v1 as + 'current' even though the new submission's status was 'approved'. + + Fix: look up by `submission_id` via `_version_no_for_submission`.""" + + def test_byte_identical_v2_promotes_to_current( + self, web_client, monkeypatch, + ): + from pathlib import Path + from app.utils import get_store_dir + from src.repositories.store_submissions import StoreSubmissionsRepository + from src.store_guardrails.runner import run_llm_review + + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-fake-for-identical-test") + owner_id, owner_cookies = _create_user(web_client, "identical@x.com") + # Same body for v1 + v2 → byte-identical zip → same hash. + identical_body = ( + "Identical body line that is intentionally long enough to " + "clear the content threshold for skill bodies. " * 4 + ) + v1_zip = _make_skill_zip("identical", body=identical_body) + r = web_client.post( + "/api/store/entities", + files={"file": ("v1.zip", v1_zip, "application/zip")}, + data={"type": "skill", "description": _OK_DESC}, + cookies=owner_cookies, + ) + assert r.status_code == 201, r.text + eid = r.json()["id"] + + conn = get_system_db() + ent_v1 = StoreEntitiesRepository(conn).get(eid) + conn.close() + v1_hash = ent_v1["version"] + assert ent_v1["version_no"] == 1 + + # Flip guardrails ON for v2. Mock LLM to approve. + monkeypatch.setattr( + "app.api.store.get_guardrails_enabled", lambda: True, + ) + monkeypatch.setattr( + "app.api.store.get_guardrails_llm_provider_ready", lambda: True, + ) + + def mock_approve(*a, **kw): + return { + "risk_level": "safe", "summary": "ok", + "findings": [], "template_placeholders_found": 0, + "reviewed_by_model": "mock", "error": None, + "content_quality": {"verdict": "pass", "issues": []}, + } + monkeypatch.setattr( + "src.store_guardrails.llm_review.review_bundle", mock_approve, + ) + + # PUT v2 with IDENTICAL bytes. + v2_zip = _make_skill_zip("identical", body=identical_body) + r = web_client.put( + f"/api/store/entities/{eid}", + files={"file": ("v2.zip", v2_zip, "application/zip")}, + cookies=owner_cookies, + ) + assert r.status_code == 200, r.text + + conn = get_system_db() + v2_sub_id = StoreSubmissionsRepository(conn).latest_for_entity(eid)["id"] + conn.close() + run_llm_review( + v2_sub_id, + plugin_dir=Path(get_store_dir()) / eid / "versions" / "v2" / "plugin", + conn_factory=get_system_db, + api_key_loader=lambda: "sk", model_loader=lambda: "mock", + ) + + conn = get_system_db() + ent_after = StoreEntitiesRepository(conn).get(eid) + v2_sub = StoreSubmissionsRepository(conn).get(v2_sub_id) + conn.close() + # Pre-fix the runner would have matched v1's history entry + # first (same hash), target_version_no=1, `1 > 1` False, no + # promote → entity stuck at v1. + assert ent_after["version_no"] == 2, ( + f"v2 must promote even when its hash matches v1's; got " + f"version_no={ent_after['version_no']}. Lookup-by-hash " + f"would have stuck the entity at v1." + ) + assert v2_sub["status"] == "approved" + assert ent_after["version"] == v1_hash, ( + "hash unchanged (bundle is byte-identical) but version_no DID move" + )