fix(store): scratch dir leak on ZIP validation failure (Devin Review)
create_entity + update_entity created the `scratch` temp dir inside one try/finally but cleaned it up in a separate one. Validation HTTPExceptions raised by _safe_zip_extract (zip_unsafe_path, zip_too_large_uncompressed) or the BadZipFile→422 conversion exited the first scope, and the second finally was never entered → temp dir leaked on every failed upload. Devin flagged this on the F2 commit. The leak pre-existed (zip_unsafe_path was the original vector); F2 added zip_too_large_uncompressed to the same broken cleanup path. Fixed by collapsing scratch creation + cleanup into one outer try/finally that covers both extraction AND metadata/bake; the inner try/except/finally still handles BadZipFile→422 + tmp file cleanup. Same restructure in update_entity. Regression test `test_scratch_dir_cleaned_up_after_failed_extraction` triggers a zip_unsafe_path 422 and asserts tmp/agnes_store_* contains no leaked dirs.
This commit is contained in:
parent
78cad8b235
commit
f0d091f721
3 changed files with 62 additions and 11 deletions
|
|
@ -82,6 +82,15 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
||||||
gates the Edit/Delete buttons on `is_owner OR is_admin`. Pre-fix, an
|
gates the Edit/Delete buttons on `is_owner OR is_admin`. Pre-fix, an
|
||||||
admin could delete via the API but saw no Edit/Delete affordance in the
|
admin could delete via the API but saw no Edit/Delete affordance in the
|
||||||
UI, and could not update non-owned entities at all.
|
UI, and could not update non-owned entities at all.
|
||||||
|
- **Scratch directory leak on ZIP validation failure** (`app/api/store.py`,
|
||||||
|
Devin Review) — `create_entity` and `update_entity` created the `scratch`
|
||||||
|
temp dir inside one `try/finally` block but cleaned it up in a separate
|
||||||
|
one. When `_safe_zip_extract` raised `HTTPException` (zip-slip,
|
||||||
|
uncompressed-too-large) or `BadZipFile` was caught and re-raised, the
|
||||||
|
exception exited the first scope and the cleanup `finally` was never
|
||||||
|
reached. Each failed upload leaked a temp dir. Fixed by collapsing
|
||||||
|
scratch creation + cleanup into a single outer `try/finally` covering
|
||||||
|
both extraction and the metadata/bake work.
|
||||||
- **Cross-owner suffix collision** (`app/api/store.py:create_entity`) —
|
- **Cross-owner suffix collision** (`app/api/store.py:create_entity`) —
|
||||||
`sanitize_username` is many-to-one (`alice.smith` and `alice_smith`
|
`sanitize_username` is many-to-one (`alice.smith` and `alice_smith`
|
||||||
both → `alice-smith`). Two such users uploading entities with the same
|
both → `alice-smith`). Two such users uploading entities with the same
|
||||||
|
|
|
||||||
|
|
@ -815,20 +815,26 @@ async def create_entity(
|
||||||
|
|
||||||
video_url = _validate_video_url(video_url)
|
video_url = _validate_video_url(video_url)
|
||||||
|
|
||||||
# Stream + extract ZIP into a scratch dir.
|
# Stream + extract ZIP into a scratch dir. Both the temp-file (`tmp`)
|
||||||
|
# AND the scratch dir need cleanup on every exit path, including
|
||||||
|
# validation HTTPExceptions raised inside _safe_zip_extract
|
||||||
|
# (zip_unsafe_path, zip_too_large_uncompressed) and the BadZipFile→422
|
||||||
|
# conversion. Pre-fix the scratch was created in one try/finally and
|
||||||
|
# cleaned up in a SEPARATE one — when extraction raised, control
|
||||||
|
# exited the first scope and the second never ran, leaking the dir.
|
||||||
|
# Single try/finally fixes both.
|
||||||
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
||||||
|
tmp.close()
|
||||||
|
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
||||||
try:
|
try:
|
||||||
tmp.close()
|
|
||||||
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
|
||||||
try:
|
try:
|
||||||
with zipfile.ZipFile(tmp.name, "r") as zf:
|
with zipfile.ZipFile(tmp.name, "r") as zf:
|
||||||
_safe_zip_extract(zf, scratch)
|
_safe_zip_extract(zf, scratch)
|
||||||
except zipfile.BadZipFile:
|
except zipfile.BadZipFile:
|
||||||
raise HTTPException(status_code=422, detail="zip_invalid")
|
raise HTTPException(status_code=422, detail="zip_invalid")
|
||||||
finally:
|
finally:
|
||||||
Path(tmp.name).unlink(missing_ok=True)
|
Path(tmp.name).unlink(missing_ok=True)
|
||||||
|
|
||||||
try:
|
|
||||||
meta = _validate_and_extract_metadata(type, scratch)
|
meta = _validate_and_extract_metadata(type, scratch)
|
||||||
final_name = (name or meta.get("name") or "").strip()
|
final_name = (name or meta.get("name") or "").strip()
|
||||||
if not final_name:
|
if not final_name:
|
||||||
|
|
@ -925,19 +931,21 @@ async def update_entity(
|
||||||
new_version: Optional[str] = None
|
new_version: Optional[str] = None
|
||||||
new_size: Optional[int] = None
|
new_size: Optional[int] = None
|
||||||
if file is not None:
|
if file is not None:
|
||||||
|
# Same single-try/finally invariant as create_entity — see the comment
|
||||||
|
# there. ZIP-validation HTTPExceptions raised inside _safe_zip_extract
|
||||||
|
# were leaking the scratch dir before this restructure.
|
||||||
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
tmp, size = await _stream_to_temp(file, MAX_ZIP_SIZE, suffix=".zip")
|
||||||
|
tmp.close()
|
||||||
|
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
||||||
try:
|
try:
|
||||||
tmp.close()
|
|
||||||
scratch = Path(tempfile.mkdtemp(prefix="agnes_store_"))
|
|
||||||
try:
|
try:
|
||||||
with zipfile.ZipFile(tmp.name, "r") as zf:
|
with zipfile.ZipFile(tmp.name, "r") as zf:
|
||||||
_safe_zip_extract(zf, scratch)
|
_safe_zip_extract(zf, scratch)
|
||||||
except zipfile.BadZipFile:
|
except zipfile.BadZipFile:
|
||||||
raise HTTPException(status_code=422, detail="zip_invalid")
|
raise HTTPException(status_code=422, detail="zip_invalid")
|
||||||
finally:
|
finally:
|
||||||
Path(tmp.name).unlink(missing_ok=True)
|
Path(tmp.name).unlink(missing_ok=True)
|
||||||
|
|
||||||
try:
|
|
||||||
_validate_and_extract_metadata(entity["type"], scratch)
|
_validate_and_extract_metadata(entity["type"], scratch)
|
||||||
suffixed = suffixed_name(entity["name"], entity["owner_username"])
|
suffixed = suffixed_name(entity["name"], entity["owner_username"])
|
||||||
new_size = _bake_plugin_tree(
|
new_size = _bake_plugin_tree(
|
||||||
|
|
|
||||||
|
|
@ -588,6 +588,40 @@ class TestStoreSecurityFixes:
|
||||||
assert r2.status_code == 409, r2.text
|
assert r2.status_code == 409, r2.text
|
||||||
assert r2.json()["detail"] == "conflict_global_suffix"
|
assert r2.json()["detail"] == "conflict_global_suffix"
|
||||||
|
|
||||||
|
def test_scratch_dir_cleaned_up_after_failed_extraction(self, web_client, monkeypatch):
|
||||||
|
"""Devin: ZIP-validation failure inside _safe_zip_extract was leaving
|
||||||
|
the ``agnes_store_*`` scratch dir on disk because scratch creation
|
||||||
|
and cleanup lived in different try/finally scopes. After the fix
|
||||||
|
both share one outer try/finally; assert the dir really is gone.
|
||||||
|
"""
|
||||||
|
import tempfile as _tempfile
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
|
||||||
|
# A ZIP whose only member traverses out of the destination —
|
||||||
|
# _safe_zip_extract raises 422 zip_unsafe_path before it touches
|
||||||
|
# extractall. That's the simplest trigger that exits via
|
||||||
|
# HTTPException without doing anything to scratch.
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with zipfile.ZipFile(buf, "w") as zf:
|
||||||
|
zf.writestr("../escape.txt", "boom")
|
||||||
|
bad_zip = buf.getvalue()
|
||||||
|
|
||||||
|
tmp_root = _Path(_tempfile.gettempdir())
|
||||||
|
before = {p.name for p in tmp_root.glob("agnes_store_*")}
|
||||||
|
|
||||||
|
_, cookies = _create_user(web_client, "leak@x.com")
|
||||||
|
r = web_client.post(
|
||||||
|
"/api/store/entities",
|
||||||
|
files={"file": ("bad.zip", bad_zip, "application/zip")},
|
||||||
|
data={"type": "skill"}, cookies=cookies,
|
||||||
|
)
|
||||||
|
assert r.status_code == 422, r.text
|
||||||
|
assert r.json()["detail"] == "zip_unsafe_path"
|
||||||
|
|
||||||
|
after = {p.name for p in tmp_root.glob("agnes_store_*")}
|
||||||
|
leaked = after - before
|
||||||
|
assert not leaked, f"scratch dir leaked: {leaked}"
|
||||||
|
|
||||||
def test_distinct_suffixes_pass(self, web_client):
|
def test_distinct_suffixes_pass(self, web_client):
|
||||||
"""F5 — uploads that yield distinct suffixed names must pass. (Avoid
|
"""F5 — uploads that yield distinct suffixed names must pass. (Avoid
|
||||||
regressing into rejecting all distinct uploads.)"""
|
regressing into rejecting all distinct uploads.)"""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue