diff --git a/CHANGELOG.md b/CHANGELOG.md index 956c1e4..61e7143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,15 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C gates the Edit/Delete buttons on `is_owner OR is_admin`. Pre-fix, an admin could delete via the API but saw no Edit/Delete affordance in the UI, and could not update non-owned entities at all. +- **Scratch directory leak on ZIP validation failure** (`app/api/store.py`, + Devin Review) — `create_entity` and `update_entity` created the `scratch` + temp dir inside one `try/finally` block but cleaned it up in a separate + one. When `_safe_zip_extract` raised `HTTPException` (zip-slip, + uncompressed-too-large) or `BadZipFile` was caught and re-raised, the + exception exited the first scope and the cleanup `finally` was never + reached. Each failed upload leaked a temp dir. Fixed by collapsing + scratch creation + cleanup into a single outer `try/finally` covering + both extraction and the metadata/bake work. - **Cross-owner suffix collision** (`app/api/store.py:create_entity`) — `sanitize_username` is many-to-one (`alice.smith` and `alice_smith` both → `alice-smith`). Two such users uploading entities with the same diff --git a/app/api/store.py b/app/api/store.py index 78cded4..aacae2b 100644 --- a/app/api/store.py +++ b/app/api/store.py @@ -815,20 +815,26 @@ async def create_entity( video_url = _validate_video_url(video_url) - # Stream + extract ZIP into a scratch dir. + # Stream + extract ZIP into a scratch dir. Both the temp-file (`tmp`) + # AND the scratch dir need cleanup on every exit path, including + # validation HTTPExceptions raised inside _safe_zip_extract + # (zip_unsafe_path, zip_too_large_uncompressed) and the BadZipFile→422 + # conversion. Pre-fix the scratch was created in one try/finally and + # cleaned up in a SEPARATE one — when extraction raised, control + # exited the first scope and the second never ran, leaking the dir. + # Single try/finally fixes both. tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip") + tmp.close() + scratch = Path(tempfile.mkdtemp(prefix="agnes_store_")) try: - tmp.close() - scratch = Path(tempfile.mkdtemp(prefix="agnes_store_")) try: with zipfile.ZipFile(tmp.name, "r") as zf: _safe_zip_extract(zf, scratch) except zipfile.BadZipFile: raise HTTPException(status_code=422, detail="zip_invalid") - finally: - Path(tmp.name).unlink(missing_ok=True) + finally: + Path(tmp.name).unlink(missing_ok=True) - try: meta = _validate_and_extract_metadata(type, scratch) final_name = (name or meta.get("name") or "").strip() if not final_name: @@ -925,19 +931,21 @@ async def update_entity( new_version: Optional[str] = None new_size: Optional[int] = None if file is not None: + # Same single-try/finally invariant as create_entity — see the comment + # there. ZIP-validation HTTPExceptions raised inside _safe_zip_extract + # were leaking the scratch dir before this restructure. tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip") + tmp.close() + scratch = Path(tempfile.mkdtemp(prefix="agnes_store_")) try: - tmp.close() - scratch = Path(tempfile.mkdtemp(prefix="agnes_store_")) try: with zipfile.ZipFile(tmp.name, "r") as zf: _safe_zip_extract(zf, scratch) except zipfile.BadZipFile: raise HTTPException(status_code=422, detail="zip_invalid") - finally: - Path(tmp.name).unlink(missing_ok=True) + finally: + Path(tmp.name).unlink(missing_ok=True) - try: _validate_and_extract_metadata(entity["type"], scratch) suffixed = suffixed_name(entity["name"], entity["owner_username"]) new_size = _bake_plugin_tree( diff --git a/tests/test_store_api.py b/tests/test_store_api.py index 3a19efe..5ee720b 100644 --- a/tests/test_store_api.py +++ b/tests/test_store_api.py @@ -588,6 +588,40 @@ class TestStoreSecurityFixes: assert r2.status_code == 409, r2.text assert r2.json()["detail"] == "conflict_global_suffix" + def test_scratch_dir_cleaned_up_after_failed_extraction(self, web_client, monkeypatch): + """Devin: ZIP-validation failure inside _safe_zip_extract was leaving + the ``agnes_store_*`` scratch dir on disk because scratch creation + and cleanup lived in different try/finally scopes. After the fix + both share one outer try/finally; assert the dir really is gone. + """ + import tempfile as _tempfile + from pathlib import Path as _Path + + # A ZIP whose only member traverses out of the destination — + # _safe_zip_extract raises 422 zip_unsafe_path before it touches + # extractall. That's the simplest trigger that exits via + # HTTPException without doing anything to scratch. + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("../escape.txt", "boom") + bad_zip = buf.getvalue() + + tmp_root = _Path(_tempfile.gettempdir()) + before = {p.name for p in tmp_root.glob("agnes_store_*")} + + _, cookies = _create_user(web_client, "leak@x.com") + r = web_client.post( + "/api/store/entities", + files={"file": ("bad.zip", bad_zip, "application/zip")}, + data={"type": "skill"}, cookies=cookies, + ) + assert r.status_code == 422, r.text + assert r.json()["detail"] == "zip_unsafe_path" + + after = {p.name for p in tmp_root.glob("agnes_store_*")} + leaked = after - before + assert not leaked, f"scratch dir leaked: {leaked}" + def test_distinct_suffixes_pass(self, web_client): """F5 — uploads that yield distinct suffixed names must pass. (Avoid regressing into rejecting all distinct uploads.)"""