fix(session-collector): argv-free run() helper, drop SystemExit footgun (Devin Review on #179)

run_session_collector called collector.main() which did argparse.parse_args()
on uvicorn's sys.argv (['app.main:app', '--host', ...]) → sys.exit(2) →
SystemExit(2), which inherits from BaseException, escapes FastAPI handlers,
and propagates through the thread pool. Every scheduler tick that fired the
endpoint either 500-ed or risked killing the uvicorn worker.

services/session_collector/collector.py now exposes run(dry_run, verbose)
that returns (rc, stats); main() is a thin CLI shim that parses argv and
delegates. The admin endpoint calls run() directly and audit-logs the
per-run stats (users_processed, files_copied, files_skipped) instead of
just the rc. Three regression tests in TestRunHelper.

Closes Devin Review finding on app/api/admin.py:2819 (#179).
This commit is contained in:
ZdenekSrotyr 2026-05-05 06:31:55 +02:00
parent 046d8705ee
commit e68c2d3f0f
5 changed files with 86 additions and 22 deletions

View file

@ -42,6 +42,7 @@ Five-defect fix for the silently-broken session pipeline on default Compose depl
- **Defect 3 — `verification_detector` had no scheduler entry.** Now in `JOBS` with a 15 min cadence, hitting the new `/api/admin/run-verification-detector` endpoint.
- **Defect 2 — side-car services gated by `profiles: [full]` were silently skipped on default deploys.** Both stanzas dropped (Changed above); the scheduler-v2 cron is the sole driver.
- **Defect 1 — `/corporate-memory` filtered `status IN ('approved','mandatory')` with no hint that pending items existed.** Admin banner added (Added above).
- **#179 review — `/api/admin/run-session-collector` would SystemExit the worker.** The endpoint called `collector.main()`, whose `argparse.parse_args()` parsed uvicorn's `sys.argv` (`['app.main:app', '--host', …]`) and called `sys.exit(2)` on the unrecognised flags. `SystemExit` inherits from `BaseException`, escapes FastAPI's exception machinery, and propagates through the thread pool — every scheduler tick that fired the endpoint either 500-ed or risked killing the uvicorn worker. Fix: `services/session_collector/collector.py` now exposes an argv-free `run(dry_run, verbose) -> (rc, stats)` helper; `main()` is a thin CLI shim around it and the admin endpoint calls `run()` directly. Audit log now carries the per-run stats (`users_processed`, `files_copied`, `files_skipped`) instead of just the rc. Regression tests in `tests/test_session_collector.py::TestRunHelper`.
### Internal

View file

@ -2816,14 +2816,16 @@ def run_session_collector(
"""
from services.session_collector import collector
rc = collector.main()
# Call run() not main(): main() does argparse.parse_args() which would
# try to parse uvicorn's sys.argv and SystemExit(2) the worker.
rc, stats = collector.run(dry_run=False, verbose=False)
AuditRepository(conn).log(
user_id=user.get("id"),
action="run_session_collector",
resource="job:session-collector",
params={"rc": rc},
params={"rc": rc, **stats},
)
return {"ok": rc == 0, "details": {"rc": rc}}
return {"ok": rc == 0, "details": {"rc": rc, **stats}}
@router.post("/run-verification-detector")

View file

@ -113,29 +113,26 @@ def collect_user_sessions(username: str, user_home: Path, dry_run: bool = False)
return copied, skipped
def main() -> int:
"""Main entry point. Returns exit code (0=success, 1=error)."""
import argparse
def run(dry_run: bool = False, verbose: bool = False) -> tuple[int, dict]:
"""Run the session-collector pass. Returns (exit_code, stats).
Argv-free so callers (FastAPI admin endpoint, scheduler) can invoke
without inheriting uvicorn's sys.argv — argparse here would
SystemExit(2) when uvicorn's flags hit it.
stats keys: users_processed, files_copied, files_skipped.
"""
import grp
parser = argparse.ArgumentParser(description="Collect Claude Code session transcripts from all users")
parser.add_argument("--dry-run", action="store_true", help="Preview what would be copied without actually copying")
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
args = parser.parse_args()
if args.verbose:
if verbose:
logger.setLevel(logging.DEBUG)
logger.info("Starting session transcript collection")
# Ensure target base directory exists
try:
TARGET_BASE.mkdir(parents=True, exist_ok=True)
# Set permissions: root:data-ops, 2770 (admins only, sessions are sensitive)
os.chmod(TARGET_BASE, 0o2770)
# Try to set group ownership to data-ops if it exists
try:
dataops_gid = grp.getgrnam("data-ops").gr_gid
os.chown(TARGET_BASE, -1, dataops_gid)
@ -146,7 +143,7 @@ def main() -> int:
except Exception as e:
logger.error(f"Failed to create target directory {TARGET_BASE}: {e}")
return 1
return 1, {"users_processed": 0, "files_copied": 0, "files_skipped": 0}
total_copied = 0
total_skipped = 0
@ -155,7 +152,6 @@ def main() -> int:
for user_home in find_user_home_dirs():
username = user_home.name
# Skip system users (numeric UIDs typically < 1000)
try:
uid = user_home.stat().st_uid
if uid < 1000:
@ -163,7 +159,7 @@ def main() -> int:
except Exception:
continue
copied, skipped = collect_user_sessions(username, user_home, dry_run=args.dry_run)
copied, skipped = collect_user_sessions(username, user_home, dry_run=dry_run)
if copied > 0 or skipped > 0:
users_processed += 1
@ -175,7 +171,24 @@ def main() -> int:
f"Collection complete: {users_processed} users, {total_copied} files copied, {total_skipped} files skipped"
)
return 0
return 0, {
"users_processed": users_processed,
"files_copied": total_copied,
"files_skipped": total_skipped,
}
def main() -> int:
"""CLI entry point. Parses argv, delegates to run()."""
import argparse
parser = argparse.ArgumentParser(description="Collect Claude Code session transcripts from all users")
parser.add_argument("--dry-run", action="store_true", help="Preview what would be copied without actually copying")
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
args = parser.parse_args()
rc, _ = run(dry_run=args.dry_run, verbose=args.verbose)
return rc
if __name__ == "__main__":

View file

@ -29,12 +29,14 @@ class TestRunSessionCollector:
def test_admin_can_trigger_session_collector(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
with patch("services.session_collector.collector.main", return_value=0) as m:
fake_stats = {"users_processed": 1, "files_copied": 2, "files_skipped": 0}
with patch("services.session_collector.collector.run", return_value=(0, fake_stats)) as m:
resp = c.post("/api/admin/run-session-collector", headers=_auth(token))
assert resp.status_code == 200
body = resp.json()
assert body["ok"] is True
m.assert_called_once()
assert body["details"]["files_copied"] == 2
m.assert_called_once_with(dry_run=False, verbose=False)
def test_non_admin_blocked(self, seeded_app):
c = seeded_app["client"]

View file

@ -96,3 +96,49 @@ class TestFindSessionFiles:
found = list(find_session_files(user_home))
assert found == []
class TestRunHelper:
"""Argv-free run() entry point — regression for #179 review (SystemExit bug)."""
def test_run_does_not_call_argparse(self, monkeypatch, tmp_path):
"""run() must not parse sys.argv — uvicorn's argv would SystemExit(2) the worker.
Regression: app/api/admin.py:run_session_collector previously called
collector.main() which did argparse.parse_args() on uvicorn's argv.
"""
from services.session_collector import collector
monkeypatch.setattr(
"sys.argv",
["app.main:app", "--host", "0.0.0.0", "--port", "8000",
"--proxy-headers", "--forwarded-allow-ips=*"],
)
monkeypatch.setattr(collector, "TARGET_BASE", tmp_path / "user_sessions")
monkeypatch.setattr(collector, "find_user_home_dirs", lambda: iter([]))
rc, stats = collector.run(dry_run=True, verbose=False)
assert rc == 0
assert stats == {"users_processed": 0, "files_copied": 0, "files_skipped": 0}
def test_run_returns_stats_tuple(self, monkeypatch, tmp_path):
"""run() returns (exit_code, stats_dict) so the admin endpoint can audit."""
from services.session_collector import collector
monkeypatch.setattr(collector, "TARGET_BASE", tmp_path / "user_sessions")
monkeypatch.setattr(collector, "find_user_home_dirs", lambda: iter([]))
rc, stats = collector.run()
assert rc == 0
assert set(stats.keys()) == {"users_processed", "files_copied", "files_skipped"}
def test_main_still_delegates_to_run(self, monkeypatch, tmp_path):
"""The CLI main() must continue to work — argparse + delegate."""
from services.session_collector import collector
monkeypatch.setattr("sys.argv", ["session_collector", "--dry-run"])
monkeypatch.setattr(collector, "TARGET_BASE", tmp_path / "user_sessions")
monkeypatch.setattr(collector, "find_user_home_dirs", lambda: iter([]))
rc = collector.main()
assert rc == 0