fix(store): scratch dir leak on ZIP validation failure (Devin Review)
create_entity + update_entity created the `scratch` temp dir inside one try/finally but cleaned it up in a separate one. Validation HTTPExceptions raised by _safe_zip_extract (zip_unsafe_path, zip_too_large_uncompressed) or the BadZipFile→422 conversion exited the first scope, and the second finally was never entered → temp dir leaked on every failed upload. Devin flagged this on the F2 commit. The leak pre-existed (zip_unsafe_path was the original vector); F2 added zip_too_large_uncompressed to the same broken cleanup path. Fixed by collapsing scratch creation + cleanup into one outer try/finally that covers both extraction AND metadata/bake; the inner try/except/finally still handles BadZipFile→422 + tmp file cleanup. Same restructure in update_entity. Regression test `test_scratch_dir_cleaned_up_after_failed_extraction` triggers a zip_unsafe_path 422 and asserts tmp/agnes_store_* contains no leaked dirs.
This commit is contained in:
parent
78cad8b235
commit
f0d091f721
3 changed files with 62 additions and 11 deletions
|
|
@ -82,6 +82,15 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
|||
gates the Edit/Delete buttons on `is_owner OR is_admin`. Pre-fix, an
|
||||
admin could delete via the API but saw no Edit/Delete affordance in the
|
||||
UI, and could not update non-owned entities at all.
|
||||
- **Scratch directory leak on ZIP validation failure** (`app/api/store.py`,
|
||||
Devin Review) — `create_entity` and `update_entity` created the `scratch`
|
||||
temp dir inside one `try/finally` block but cleaned it up in a separate
|
||||
one. When `_safe_zip_extract` raised `HTTPException` (zip-slip,
|
||||
uncompressed-too-large) or `BadZipFile` was caught and re-raised, the
|
||||
exception exited the first scope and the cleanup `finally` was never
|
||||
reached. Each failed upload leaked a temp dir. Fixed by collapsing
|
||||
scratch creation + cleanup into a single outer `try/finally` covering
|
||||
both extraction and the metadata/bake work.
|
||||
- **Cross-owner suffix collision** (`app/api/store.py:create_entity`) —
|
||||
`sanitize_username` is many-to-one (`alice.smith` and `alice_smith`
|
||||
both → `alice-smith`). Two such users uploading entities with the same
|
||||
|
|
|
|||
|
|
@ -815,11 +815,18 @@ async def create_entity(
|
|||
|
||||
video_url = _validate_video_url(video_url)
|
||||
|
||||
# Stream + extract ZIP into a scratch dir.
|
||||
# Stream + extract ZIP into a scratch dir. Both the temp-file (`tmp`)
|
||||
# AND the scratch dir need cleanup on every exit path, including
|
||||
# validation HTTPExceptions raised inside _safe_zip_extract
|
||||
# (zip_unsafe_path, zip_too_large_uncompressed) and the BadZipFile→422
|
||||
# conversion. Pre-fix the scratch was created in one try/finally and
|
||||
# cleaned up in a SEPARATE one — when extraction raised, control
|
||||
# exited the first scope and the second never ran, leaking the dir.
|
||||
# Single try/finally fixes both.
|
||||
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
||||
try:
|
||||
tmp.close()
|
||||
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
||||
try:
|
||||
try:
|
||||
with zipfile.ZipFile(tmp.name, "r") as zf:
|
||||
_safe_zip_extract(zf, scratch)
|
||||
|
|
@ -828,7 +835,6 @@ async def create_entity(
|
|||
finally:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
|
||||
try:
|
||||
meta = _validate_and_extract_metadata(type, scratch)
|
||||
final_name = (name or meta.get("name") or "").strip()
|
||||
if not final_name:
|
||||
|
|
@ -925,10 +931,13 @@ async def update_entity(
|
|||
new_version: Optional[str] = None
|
||||
new_size: Optional[int] = None
|
||||
if file is not None:
|
||||
# Same single-try/finally invariant as create_entity — see the comment
|
||||
# there. ZIP-validation HTTPExceptions raised inside _safe_zip_extract
|
||||
# were leaking the scratch dir before this restructure.
|
||||
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
||||
try:
|
||||
tmp.close()
|
||||
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
||||
try:
|
||||
try:
|
||||
with zipfile.ZipFile(tmp.name, "r") as zf:
|
||||
_safe_zip_extract(zf, scratch)
|
||||
|
|
@ -937,7 +946,6 @@ async def update_entity(
|
|||
finally:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
|
||||
try:
|
||||
_validate_and_extract_metadata(entity["type"], scratch)
|
||||
suffixed = suffixed_name(entity["name"], entity["owner_username"])
|
||||
new_size = _bake_plugin_tree(
|
||||
|
|
|
|||
|
|
@ -588,6 +588,40 @@ class TestStoreSecurityFixes:
|
|||
assert r2.status_code == 409, r2.text
|
||||
assert r2.json()["detail"] == "conflict_global_suffix"
|
||||
|
||||
def test_scratch_dir_cleaned_up_after_failed_extraction(self, web_client, monkeypatch):
|
||||
"""Devin: ZIP-validation failure inside _safe_zip_extract was leaving
|
||||
the ``agnes_store_*`` scratch dir on disk because scratch creation
|
||||
and cleanup lived in different try/finally scopes. After the fix
|
||||
both share one outer try/finally; assert the dir really is gone.
|
||||
"""
|
||||
import tempfile as _tempfile
|
||||
from pathlib import Path as _Path
|
||||
|
||||
# A ZIP whose only member traverses out of the destination —
|
||||
# _safe_zip_extract raises 422 zip_unsafe_path before it touches
|
||||
# extractall. That's the simplest trigger that exits via
|
||||
# HTTPException without doing anything to scratch.
|
||||
buf = io.BytesIO()
|
||||
with zipfile.ZipFile(buf, "w") as zf:
|
||||
zf.writestr("../escape.txt", "boom")
|
||||
bad_zip = buf.getvalue()
|
||||
|
||||
tmp_root = _Path(_tempfile.gettempdir())
|
||||
before = {p.name for p in tmp_root.glob("agnes_store_*")}
|
||||
|
||||
_, cookies = _create_user(web_client, "leak@x.com")
|
||||
r = web_client.post(
|
||||
"/api/store/entities",
|
||||
files={"file": ("bad.zip", bad_zip, "application/zip")},
|
||||
data={"type": "skill"}, cookies=cookies,
|
||||
)
|
||||
assert r.status_code == 422, r.text
|
||||
assert r.json()["detail"] == "zip_unsafe_path"
|
||||
|
||||
after = {p.name for p in tmp_root.glob("agnes_store_*")}
|
||||
leaked = after - before
|
||||
assert not leaked, f"scratch dir leaked: {leaked}"
|
||||
|
||||
def test_distinct_suffixes_pass(self, web_client):
|
||||
"""F5 — uploads that yield distinct suffixed names must pass. (Avoid
|
||||
regressing into rejecting all distinct uploads.)"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue