From 4501c9c3ddae2cda581ac6440fee9800655ed04c Mon Sep 17 00:00:00 2001 From: Vojtech <119944107+cvrysanek@users.noreply.github.com> Date: Thu, 14 May 2026 10:02:44 +0400 Subject: [PATCH] =?UTF-8?q?fix(store-guardrails):=20post-#290=20review=20f?= =?UTF-8?q?ollow-up=20=E2=80=94=20purge=20tuple,=20filter=20chip,=20stale?= =?UTF-8?q?=20docs,=20lazy=20bundle=5Fmeta,=20logger.exception=20(#295)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses post-merge review findings on #290: - Admin Rescan is the only post-v30 producer of status='blocked_inline'. Re-add it to admin queue 'Needs review' filter chip and to TERMINAL_BLOCKED_STATUSES in the bundle-purge job so rescan-produced rows surface in the default operator view and bundles get TTL-swept instead of lingering indefinitely. - Update three doc-drift sites still referring to the pre-#290 spam counter scope (counted blocked_inline). The counter now narrows to blocked_llm + review_error; fix the comment in app/api/store.py, the docstring in get_guardrails_blocked_quota_per_day(), and the operator-facing hint rendered on /admin/server-config. - Add positive test for _reject_inline_or_continue validation branch (code='validation_failed', checks payload shape, no-DB-write contract). Locks the frontend wizard's detail.checks contract. - Tighten test_quota_disabled_with_zero — assert (200, 201) explicitly instead of !=429 so a 500 regression no longer passes. - _reject_inline_or_continue takes plugin_dir and lazy-computes bundle_meta only on the security branch. Validation rejects no longer pay for a SHA256 walk on the bundle. - Surface store.upload.security_blocked audit-log write failures via logger.exception instead of swallowing — that audit row is the only forensic trace by design. --- CHANGELOG.md | 10 +++ app/api/admin.py | 9 ++- app/api/store.py | 45 +++++++++---- app/instance_config.py | 12 ++-- .../templates/admin_store_submissions.html | 14 ++-- src/store_guardrails/purge.py | 13 ++-- tests/test_admin_store_submissions.py | 64 ++++++++++++++++++- 7 files changed, 134 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35f7222..761da94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +### Fixed +- **Store guardrails — post-#290 follow-up.** Admin Rescan still writes `status='blocked_inline'` (the only post-v30 producer of that status). Re-add `blocked_inline` to the admin queue's "Needs review" filter chip and to `TERMINAL_BLOCKED_STATUSES` in the bundle-purge job, so a rescan-produced row surfaces in the default operator view and its bundle gets swept by the TTL purge instead of lingering on disk indefinitely. Documents the rescan-only asymmetry inline (chip + purge tuple + new code comments). +- Stale doc strings referring to the pre-#290 `blocked_inline` quota counter on `app/api/store.py` spam-quota comment, `app/instance_config.py::get_guardrails_blocked_quota_per_day` docstring, and the operator-facing hint in `/admin/server-config` (`blocked_quota_per_day`). All three now correctly describe the narrowed `blocked_llm + review_error` counter that #290 actually shipped. + +### Internal +- Tightened `test_quota_disabled_with_zero` assertion from `r.status_code != 429` to `r.status_code in (200, 201)` so a 500 regression no longer slips through as quota-disabled. +- New positive test `test_inline_validation_returns_validation_failed_code` covering the `_reject_inline_or_continue` validation branch end-to-end (response code + checks payload shape + no-DB-write contract). Locks the frontend wizard's `detail.checks.{manifest,content,quality}` contract. +- `_reject_inline_or_continue` now takes `plugin_dir` and lazy-computes `bundle_meta` only on the security branch; the validation branch (the common case for honest submitters) no longer pays for a SHA256 walk over the bundle on every reject. +- Surface failures to write the `store.upload.security_blocked` audit row via `logger.exception` instead of silently swallowing — that audit row is the only forensic trace of an inline-tier security finding, and a swallowed DB error would have left no record at all. + ## [0.54.9] — 2026-05-13 ### Added diff --git a/app/api/admin.py b/app/api/admin.py index f4b1040..3345a12 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -772,9 +772,12 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = { "kind": "int", "default": 50, "hint": ( - "Per-submitter cap on `blocked_inline` rows in the " - "trailing 24h. Bounds the worst case where a bot loops " - "on malformed ZIPs. 0 disables the quota. Default 50." + "Per-submitter cap on `blocked_llm` + `review_error` " + "rows in the trailing 24h. Bounds the worst case where " + "a bot loops on bundles that survive inline checks but " + "trip the async LLM reviewer. Inline failures are " + "hard-rejected upstream (no row, not counted). 0 " + "disables the quota. Default 50." ), }, "blocked_bundle_ttl_days": { diff --git a/app/api/store.py b/app/api/store.py index af565ff..3dffc51 100644 --- a/app/api/store.py +++ b/app/api/store.py @@ -238,7 +238,7 @@ def _reject_inline_or_continue( conn: duckdb.DuckDBPyConnection, user: dict, inline: InlineResult, - bundle_meta: Any, + plugin_dir: Path, cleanup_paths: List[Path], type_: str, name: str, @@ -292,6 +292,12 @@ def _reject_inline_or_continue( ) if inline.static_security.get("status") != "pass": + # Lazy-compute bundle_meta only on the security branch. The + # validation branch returned above without needing the hash, + # so callers don't pay for compute_bundle_meta when manifest / + # content checks fail (the common case for honest submitters). + from src.store_guardrails.bundle_meta import compute_bundle_meta + bundle_meta = compute_bundle_meta(plugin_dir) findings = inline.static_security.get("findings") or [] try: AuditRepository(conn).log( @@ -311,7 +317,16 @@ def _reject_inline_or_continue( result="blocked", ) except Exception: - pass + # The security_blocked audit row is the ONLY forensic + # trace of this attempt (no DB submission row by design), + # so a swallowed failure here loses the signal entirely. + # Surface it in logs even though we keep raising the 422 + # so the submitter still sees the same response. + logger.exception( + "Failed to write store.upload.security_blocked " + "audit_log entry (context=%s sha256=%s)", + context, bundle_meta.sha256, + ) for p in cleanup_paths: shutil.rmtree(p, ignore_errors=True) raise HTTPException( @@ -1157,8 +1172,10 @@ async def create_entity( # queue with noise. Cap rejected uploads per submitter per 24h; # operator can disable by setting the knob to 0. # - # #9 widening: counts blocked_inline + blocked_llm + review_error so - # a submitter triggering only LLM-blocked verdicts is bounded too. + # Counter narrows to blocked_llm + review_error — inline failures + # are hard-rejected upstream and never create rows. HTTP-level + # slowapi limits + the `store.upload.security_blocked` audit trail + # cover the inline-tier abuse path. # # Race note (#5, deferred): two parallel uploads from the same # submitter can both pass the SELECT before either INSERT — the cap @@ -1259,8 +1276,6 @@ async def create_entity( # hard-rejected — no entity row, no submission row, no bundle on # disk. Quarantine + admin rescan apply ONLY to the async LLM # path (see runner.run_llm_review). See docs/STORE_GUARDRAILS.md. - from src.store_guardrails.bundle_meta import compute_bundle_meta - bundle_meta = compute_bundle_meta(plugin_dir) inline = run_inline_checks( plugin_dir, type_=type, description=final_description, ) @@ -1268,12 +1283,18 @@ async def create_entity( conn=conn, user=user, inline=inline, - bundle_meta=bundle_meta, + plugin_dir=plugin_dir, cleanup_paths=[_entity_dir(entity_id)], type_=type, name=final_name, context="create", ) + # Compute meta after the reject gate so honest submitters whose + # bundles fail validation never pay for the SHA256 walk; this + # path only fires once we know the bundle is going to be + # persisted (as a submission row). + from src.store_guardrails.bundle_meta import compute_bundle_meta + bundle_meta = compute_bundle_meta(plugin_dir) subs_repo = StoreSubmissionsRepository(conn) guardrails_on = get_guardrails_enabled() @@ -1509,8 +1530,6 @@ async def update_entity( description=description if description is not None else entity.get("description"), ) - from src.store_guardrails.bundle_meta import compute_bundle_meta - staging_meta = compute_bundle_meta(staging_plugin) # Hard-reject on inline failure. _reject_inline_or_continue # cleans the staged version dir (no live state to roll back — # the live ``plugin/`` tree was never touched) and raises 422 @@ -1520,7 +1539,7 @@ async def update_entity( conn=conn, user=user, inline=inline_after_update, - bundle_meta=staging_meta, + plugin_dir=staging_plugin, cleanup_paths=[version_root], type_=entity["type"], name=entity["name"], @@ -1843,8 +1862,6 @@ async def restore_version( type_=entity["type"], description=entity.get("description"), ) - from src.store_guardrails.bundle_meta import compute_bundle_meta - target_meta = compute_bundle_meta(target_plugin) # Hard-reject on inline failure. Wipe the staged version dir # entirely — live tree untouched, no entity/submission row to # create. The submitter sees the structured 422 and either fixes @@ -1853,7 +1870,7 @@ async def restore_version( conn=conn, user=user, inline=inline, - bundle_meta=target_meta, + plugin_dir=target_plugin, cleanup_paths=[target_root], type_=entity["type"], name=entity["name"], @@ -1864,6 +1881,8 @@ async def restore_version( # the PUT edit path: existing installers keep getting the prior # approved version through the LLM review window. Promotion (live # swap + version_no/version/file_size bump) waits on LLM approval. + from src.store_guardrails.bundle_meta import compute_bundle_meta + target_meta = compute_bundle_meta(target_plugin) guardrails_on = get_guardrails_enabled() subs_repo = StoreSubmissionsRepository(conn) sub_id = subs_repo.create( diff --git a/app/instance_config.py b/app/instance_config.py index 84aa9b8..207523e 100644 --- a/app/instance_config.py +++ b/app/instance_config.py @@ -432,12 +432,16 @@ def get_guardrails_review_model() -> str: def get_guardrails_blocked_quota_per_day() -> int: - """Per-submitter cap on `blocked_inline` rows in the trailing 24h. + """Per-submitter cap on `blocked_llm` + `review_error` rows in the + trailing 24h. Defaults to 50. Set to 0 in instance.yaml to disable the quota - entirely (useful for trusted single-tenant deployments). Bounds the - worst case where a bot loops on malformed ZIPs and fills disk + - the admin queue with noise. + entirely (useful for trusted single-tenant deployments). Bounds + the worst case where a bot loops on bundles that pass inline + checks but trip the async LLM reviewer. Inline failures are + hard-rejected upstream (no row created) and not counted here; + HTTP-level rate limiting + the + ``store.upload.security_blocked`` audit trail cover that path. """ val = get_value("guardrails", "blocked_quota_per_day", default=50) try: diff --git a/app/web/templates/admin_store_submissions.html b/app/web/templates/admin_store_submissions.html index ce79b9b..dc11c65 100644 --- a/app/web/templates/admin_store_submissions.html +++ b/app/web/templates/admin_store_submissions.html @@ -225,11 +225,15 @@ All {% set _pending = "pending_llm,pending_inline" %} Pending - {# 'blocked_inline' deliberately omitted — inline failures are now - hard-rejected upstream (no submission row created). Legacy - blocked_inline rows from instances that ran the v30 contract - remain reachable via the 'All' tab. #} - {% set _needs = "blocked_llm,review_error" %} + {# Inline failures on the upload path are hard-rejected and create + no rows — the only producer of `blocked_inline` post-v30 is the + admin Rescan flow, which re-runs inline checks against an + already-quarantined bundle. Include it here so an admin who + triggers a rescan sees the result in their default queue + instead of having to scroll to 'All'. Legacy `blocked_inline` + rows from pre-v30 instances also surface here, which is fine — + same actionable state. #} + {% set _needs = "blocked_inline,blocked_llm,review_error" %} Needs review Approved Overridden diff --git a/src/store_guardrails/purge.py b/src/store_guardrails/purge.py index 23bc263..7e95243 100644 --- a/src/store_guardrails/purge.py +++ b/src/store_guardrails/purge.py @@ -31,12 +31,15 @@ logger = logging.getLogger(__name__) # `approved` (live entity, never purge), `overridden` (admin already # decided to publish), and `pending_*` (still in review). # -# `blocked_inline` was removed when inline failures became hard-rejects -# (no submission row, no bundle persisted). Legacy `blocked_inline` rows -# from pre-cutover instances stay on disk indefinitely; reachable via -# audit_log lookups but no longer swept here. Re-add to the tuple if a -# one-shot legacy-cleanup pass is wanted. +# Inline-tier failures on the upload path are hard-rejected and never +# create rows here. The only writer of `blocked_inline` post-v30 is +# `admin_rescan_store_submission` — an admin-initiated rescan that +# re-fails inline produces a `blocked_inline` row pointing at the +# already-quarantined bundle. Sweeping these here matches operator +# expectation: an admin Rescan should not cause a previously-purged +# bundle to outlive its TTL just because the verdict changed. TERMINAL_BLOCKED_STATUSES = ( + "blocked_inline", "blocked_llm", "review_error", ) diff --git a/tests/test_admin_store_submissions.py b/tests/test_admin_store_submissions.py index 5be7413..98cb713 100644 --- a/tests/test_admin_store_submissions.py +++ b/tests/test_admin_store_submissions.py @@ -269,6 +269,63 @@ class TestAdminListing: assert params.get("bundle_sha256") assert params.get("submitter_email") == "spammer@x.com" + def test_inline_validation_returns_validation_failed_code(self, web_client): + """A bundle that survives pre-bake but fails ``content_check`` + (description too short) goes through ``_reject_inline_or_continue`` + and is rejected with the new two-tier response: 422, + ``detail.code == 'validation_failed'``, populated + ``detail.checks`` shape, NO submission row, NO entity row, + and NO audit_log entry (validation-tier failures are + operator-fixable, not forensically interesting). + + Distinct from ``test_validation_failure_creates_no_audit_trail`` + below which exercises the pre-bake ``zip_missing_skill_md`` path + — that one fails before inline checks ever run. + """ + from src.repositories.audit import AuditRepository + from src.repositories.store_submissions import StoreSubmissionsRepository + + # Valid skill layout — pre-bake parses frontmatter, layout + # check passes. But description is < 60 chars → content_check + # fires inside run_inline_checks → _reject_inline_or_continue + # returns validation_failed. + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr( + "tiny/SKILL.md", + "---\nname: tiny\ndescription: too short\n---\n\n" + + ("Body text. " * 50), + ) + short_desc_zip = buf.getvalue() + + user_id, user_cookies = _create_user(web_client, "shortdesc@x.com") + r = web_client.post( + "/api/store/entities", + files={"file": ("s.zip", short_desc_zip, "application/zip")}, + data={"type": "skill"}, cookies=user_cookies, + ) + assert r.status_code == 422, r.text + detail = r.json()["detail"] + assert detail["code"] == "validation_failed", detail + # Frontend wizard (humanizeError in store_upload.html) reads + # detail.checks.{manifest,content,quality} — lock the shape. + assert set(detail["checks"].keys()) == {"manifest", "content", "quality"} + assert detail["checks"]["content"]["status"] != "pass" + + # Validation-tier failures must not produce DB rows or audit entries. + conn = get_system_db() + items, _total = StoreSubmissionsRepository(conn).list_for_admin( + submitter_id=user_id, + ) + assert items == [] + rows, _cursor = AuditRepository(conn).query( + user_id=user_id, action_prefix="store.upload.", limit=10, + ) + conn.close() + assert rows == [], ( + "validation-tier rejection must not write audit_log entries" + ) + def test_validation_failure_creates_no_audit_trail(self, web_client): """A bundle that fails manifest validation (missing SKILL.md) is a fixable user error — no submission row, no entity row, and @@ -897,9 +954,10 @@ class TestQuota: ) # Clean upload — passes inline guardrails. With ANTHROPIC_API_KEY # absent in tests the guardrail pipeline auto-disables so the - # entity lands at ``approved`` (201). Anything other than 429 is - # the quota-disabled outcome we care about. - assert r.status_code != 429, r.text + # entity lands at ``approved`` (201). Assert the success codes + # explicitly so a 500 from an unrelated regression doesn't + # masquerade as quota-disabled. + assert r.status_code in (200, 201), r.text def test_quota_counter_includes_blocked_llm_and_review_error(self, web_client): """The counter narrows to ``blocked_llm`` + ``review_error`` —