fix(query): #168 review iter 2 — quota user_id parity + concurrent-slot 429

Devin Review iter #2 found 2 new issues (after iter #1's 5 fixes
landed). Both real, both addressed.

🔴 Quota user_id key mismatch defeated shared daily budget. /api/query
computed `user.get("id") or user.get("email")` while /api/v2/scan uses
`user.get("email") or "anon"` (app/api/v2_scan.py:327). Same user → two
different keys in the singleton QuotaTracker. BQ bytes consumed via
/api/query were tracked under UUID; via /api/v2/scan under email; the
`check_daily_budget` pre-flight on either endpoint never saw the
other's recorded bytes — per-user cap was effectively doubled. Match
v2/scan's email-first ordering.

🟡 QuotaExceededError(KIND_CONCURRENT) → 400 instead of 429.
`quota.acquire(user_id)` raises this from __enter__ when the per-user
concurrent-scan slot is at cap. The exception propagated through the
@contextlib.contextmanager generator, the caller's `with guard:`
block, and was caught by execute_query's generic `except Exception`
handler → mapped to 400 with a flattened "Query error: concurrent_scans:
N/M" string, dropping the typed retry_after_seconds field. Wrap the
`with quota.acquire(...)` in a try/except QuotaExceededError that maps
to 429 with the same typed-detail shape used for the daily-budget
rejection — consistent with /api/v2/scan:392-402.

Tests: test_api_query_quota.py user_id strings updated to
"admin@test.com" (the seeded_app admin's email) to match the new
email-first ordering. 40 affected tests pass.
This commit is contained in:
ZdenekSrotyr 2026-05-04 13:38:31 +02:00
parent 1263b80726
commit 5eaa449fcc
2 changed files with 61 additions and 34 deletions

View file

@ -149,7 +149,13 @@ async def execute_query(
# implementation released the slot before execute. Use a context # implementation released the slot before execute. Use a context
# manager so dry-run + cap check + execute + record_bytes all run # manager so dry-run + cap check + execute + record_bytes all run
# inside the slot. # inside the slot.
user_id = user.get("id") or user.get("email") or "anon" # Match /api/v2/scan's user_id key shape (`email or "anon"`) so the
# shared QuotaTracker singleton sees the SAME key for both endpoints.
# Earlier `id or email` ordering keyed BQ bytes on UUID for /api/query
# vs email for /api/v2/scan — the per-user daily cap was effectively
# doubled because the two paths tracked under different keys.
# Devin Review #2 caught this on PR #168.
user_id = user.get("email") or user.get("id") or "anon"
guard = ( guard = (
_bq_quota_and_cap_guard( _bq_quota_and_cap_guard(
user_id=user_id, dry_run_set=dry_run_set, sql=request.sql, user_id=user_id, dry_run_set=dry_run_set, sql=request.sql,
@ -420,6 +426,16 @@ def _bq_quota_and_cap_guard(*, user_id: str, dry_run_set: list, sql: str):
cap_bytes = _default_remote_query_cap_bytes() cap_bytes = _default_remote_query_cap_bytes()
# `quota.acquire(user_id)` raises QuotaExceededError(KIND_CONCURRENT)
# via __enter__ when the per-user concurrent-scan slot is at cap.
# Catch around the `with` and map to HTTP 429 with the typed detail
# shape — same shape as the daily-budget rejection above. Without
# this, the exception propagates through @contextlib.contextmanager
# and is caught by execute_query's generic `except Exception` →
# returns HTTP 400 with a flattened "Query error: concurrent_scans:
# N/M" string, dropping the typed retry_after_seconds field.
# Devin Review #2 on PR #168.
try:
with quota.acquire(user_id): with quota.acquire(user_id):
total_bytes = 0 total_bytes = 0
for i, (bucket, source_table, _) in enumerate(dry_run_set): for i, (bucket, source_table, _) in enumerate(dry_run_set):
@ -452,3 +468,14 @@ def _bq_quota_and_cap_guard(*, user_id: str, dry_run_set: list, sql: str):
# Yield control to the handler — slot stays acquired while the # Yield control to the handler — slot stays acquired while the
# caller runs analytics.execute() + record_bytes(). # caller runs analytics.execute() + record_bytes().
yield total_bytes yield total_bytes
except QuotaExceededError as exc:
# Only KIND_CONCURRENT can land here (daily-budget already mapped
# above; record_bytes never raises). Map to 429 with structured
# detail consistent with the daily-budget shape.
raise HTTPException(status_code=429, detail={
"reason": "concurrent_slot_exceeded",
"kind": exc.kind,
"current": exc.current,
"limit": exc.limit,
"retry_after_seconds": exc.retry_after_seconds,
})

View file

@ -68,7 +68,7 @@ def test_query_records_bytes_against_shared_quota(seeded_app, fresh_quota, mock_
# Pre-flight: tracker has zero usage for this user. # Pre-flight: tracker has zero usage for this user.
tracker = fresh_quota._build_quota_tracker() tracker = fresh_quota._build_quota_tracker()
user_id = "admin1" # seeded_app's admin user id user_id = "admin@test.com" # email-keyed per parity with /api/v2/scan (#168 review) # seeded_app's admin user id
before = tracker.bytes_used_today(user_id) before = tracker.bytes_used_today(user_id)
r = c.post( r = c.post(
@ -93,7 +93,7 @@ def test_query_pre_flight_rejects_user_over_daily_cap(seeded_app, fresh_quota, m
# Plant the user's daily counter already at the cap by injecting bytes. # Plant the user's daily counter already at the cap by injecting bytes.
tracker = fresh_quota._build_quota_tracker() tracker = fresh_quota._build_quota_tracker()
user_id = "admin1" user_id = "admin@test.com" # email-keyed per parity with /api/v2/scan (#168 review)
# Push counter past the cap (default 50 GiB). # Push counter past the cap (default 50 GiB).
tracker.record_bytes(user_id, tracker._max_daily_bytes + 1) tracker.record_bytes(user_id, tracker._max_daily_bytes + 1)
@ -111,7 +111,7 @@ def test_non_bq_query_skips_quota_path(seeded_app, fresh_quota, mock_dry_run):
"""A query that doesn't touch any registered remote BQ row must NOT """A query that doesn't touch any registered remote BQ row must NOT
decrement quota. Quota wiring runs only when dry_run_set is non-empty.""" decrement quota. Quota wiring runs only when dry_run_set is non-empty."""
tracker = fresh_quota._build_quota_tracker() tracker = fresh_quota._build_quota_tracker()
user_id = "admin1" user_id = "admin@test.com" # email-keyed per parity with /api/v2/scan (#168 review)
before = tracker.bytes_used_today(user_id) before = tracker.bytes_used_today(user_id)
c = seeded_app["client"] c = seeded_app["client"]