fix(query): #168 review iter 2 — quota user_id parity + concurrent-slot 429

Devin Review iter #2 found 2 new issues (after iter #1's 5 fixes landed). Both real, both addressed. 🔴 Quota user_id key mismatch defeated shared daily budget. /api/query computed `user.get("id") or user.get("email")` while /api/v2/scan uses `user.get("email") or "anon"` (app/api/v2_scan.py:327). Same user → two different keys in the singleton QuotaTracker. BQ bytes consumed via /api/query were tracked under UUID; via /api/v2/scan under email; the `check_daily_budget` pre-flight on either endpoint never saw the other's recorded bytes — per-user cap was effectively doubled. Match v2/scan's email-first ordering. 🟡 QuotaExceededError(KIND_CONCURRENT) → 400 instead of 429. `quota.acquire(user_id)` raises this from __enter__ when the per-user concurrent-scan slot is at cap. The exception propagated through the @contextlib.contextmanager generator, the caller's `with guard:` block, and was caught by execute_query's generic `except Exception` handler → mapped to 400 with a flattened "Query error: concurrent_scans: N/M" string, dropping the typed retry_after_seconds field. Wrap the `with quota.acquire(...)` in a try/except QuotaExceededError that maps to 429 with the same typed-detail shape used for the daily-budget rejection — consistent with /api/v2/scan:392-402. Tests: test_api_query_quota.py user_id strings updated to "admin@test.com" (the seeded_app admin's email) to match the new email-first ordering. 40 affected tests pass.
2026-05-04 13:38:31 +02:00 · 2026-05-04 13:38:31 +02:00 · 5eaa449fcc
commit 5eaa449fcc
parent 1263b80726
2 changed files with 61 additions and 34 deletions
--- a/app/api/query.py
+++ b/app/api/query.py
@ -149,7 +149,13 @@ async def execute_query(
        # implementation released the slot before execute. Use a context
        # manager so dry-run + cap check + execute + record_bytes all run
        # inside the slot.
-        user_id = user.get("id") or user.get("email") or "anon"
+        # Match /api/v2/scan's user_id key shape (`email or "anon"`) so the
+        # shared QuotaTracker singleton sees the SAME key for both endpoints.
+        # Earlier `id or email` ordering keyed BQ bytes on UUID for /api/query
+        # vs email for /api/v2/scan — the per-user daily cap was effectively
+        # doubled because the two paths tracked under different keys.
+        # Devin Review #2 caught this on PR #168.
+        user_id = user.get("email") or user.get("id") or "anon"
        guard = (
            _bq_quota_and_cap_guard(
                user_id=user_id, dry_run_set=dry_run_set, sql=request.sql,
@ -420,6 +426,16 @@ def _bq_quota_and_cap_guard(*, user_id: str, dry_run_set: list, sql: str):

    cap_bytes = _default_remote_query_cap_bytes()

+    # `quota.acquire(user_id)` raises QuotaExceededError(KIND_CONCURRENT)
+    # via __enter__ when the per-user concurrent-scan slot is at cap.
+    # Catch around the `with` and map to HTTP 429 with the typed detail
+    # shape — same shape as the daily-budget rejection above. Without
+    # this, the exception propagates through @contextlib.contextmanager
+    # and is caught by execute_query's generic `except Exception` →
+    # returns HTTP 400 with a flattened "Query error: concurrent_scans:
+    # N/M" string, dropping the typed retry_after_seconds field.
+    # Devin Review #2 on PR #168.
+    try:
        with quota.acquire(user_id):
            total_bytes = 0
            for i, (bucket, source_table, _) in enumerate(dry_run_set):
@ -452,3 +468,14 @@ def _bq_quota_and_cap_guard(*, user_id: str, dry_run_set: list, sql: str):
            # Yield control to the handler — slot stays acquired while the
            # caller runs analytics.execute() + record_bytes().
            yield total_bytes
+    except QuotaExceededError as exc:
+        # Only KIND_CONCURRENT can land here (daily-budget already mapped
+        # above; record_bytes never raises). Map to 429 with structured
+        # detail consistent with the daily-budget shape.
+        raise HTTPException(status_code=429, detail={
+            "reason": "concurrent_slot_exceeded",
+            "kind": exc.kind,
+            "current": exc.current,
+            "limit": exc.limit,
+            "retry_after_seconds": exc.retry_after_seconds,
+        })
--- a/tests/test_api_query_quota.py
+++ b/tests/test_api_query_quota.py
@ -68,7 +68,7 @@ def test_query_records_bytes_against_shared_quota(seeded_app, fresh_quota, mock_

    # Pre-flight: tracker has zero usage for this user.
    tracker = fresh_quota._build_quota_tracker()
-    user_id = "admin1"  # seeded_app's admin user id
+    user_id = "admin@test.com"  # email-keyed per parity with /api/v2/scan (#168 review)  # seeded_app's admin user id
    before = tracker.bytes_used_today(user_id)

    r = c.post(
@ -93,7 +93,7 @@ def test_query_pre_flight_rejects_user_over_daily_cap(seeded_app, fresh_quota, m

    # Plant the user's daily counter already at the cap by injecting bytes.
    tracker = fresh_quota._build_quota_tracker()
-    user_id = "admin1"
+    user_id = "admin@test.com"  # email-keyed per parity with /api/v2/scan (#168 review)
    # Push counter past the cap (default 50 GiB).
    tracker.record_bytes(user_id, tracker._max_daily_bytes + 1)

@ -111,7 +111,7 @@ def test_non_bq_query_skips_quota_path(seeded_app, fresh_quota, mock_dry_run):
    """A query that doesn't touch any registered remote BQ row must NOT
    decrement quota. Quota wiring runs only when dry_run_set is non-empty."""
    tracker = fresh_quota._build_quota_tracker()
-    user_id = "admin1"
+    user_id = "admin@test.com"  # email-keyed per parity with /api/v2/scan (#168 review)
    before = tracker.bytes_used_today(user_id)

    c = seeded_app["client"]