agnes-the-ai-analyst/app/api/v2_quota.py
ZdenekSrotyr e44d2280e5 refactor(quota): #160 relocate _build_quota_tracker to v2_quota.py
The /api/query cost guardrail (next phase) needs the same singleton
QuotaTracker so its daily-byte and concurrent-slot caps accumulate
across both /api/v2/scan and /api/query BQ-touching paths.

Move `_build_quota_tracker`, `_quota_singleton`, and `_quota_init_lock`
from app/api/v2_scan.py to app/api/v2_quota.py (the natural home; the
factory uses QuotaTracker which already lives there). Re-export the
function from v2_scan.py so the 7 test sites at tests/test_v2_scan.py
(lines 77, 118, 143, 160, 186, 208, 250) keep working without edits.

Crucially do NOT re-export `_quota_singleton` from v2_scan.py — Python
`from X import var` copies the binding at import time, so a re-exported
singleton would freeze at the initial None and never observe the
in-place mutation done inside `_build_quota_tracker()`. Re-export only
the function (which always reads the live module-global through `global`).

Mechanical refactor; no behavior change. 30 quota-related tests pass.
2026-05-04 10:31:35 +02:00

162 lines
5.9 KiB
Python

"""Process-local quota tracker for /api/v2/scan (spec §3.8).
In-memory only. Multi-replica deployments effectively multiply caps by N
(documented caveat — see spec §9.4). Future v2 should move to durable
storage if horizontal scale is needed.
"""
from __future__ import annotations
import contextlib
import logging
import threading
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Iterator
logger = logging.getLogger(__name__)
KIND_CONCURRENT = "concurrent_scans"
KIND_DAILY_BYTES = "daily_bytes"
@dataclass
class QuotaExceededError(Exception):
kind: str
current: int
limit: int
retry_after_seconds: int = 0
def __str__(self) -> str:
return f"{self.kind}: {self.current}/{self.limit}"
def _utcnow() -> datetime: # patched in tests
return datetime.now(timezone.utc)
def _utc_today() -> str:
"""ISO date string in UTC, used as the daily-bucket key."""
return _utcnow().strftime("%Y-%m-%d")
def _seconds_until_utc_midnight() -> int:
now = _utcnow()
midnight = now.replace(hour=0, minute=0, second=0, microsecond=0)
next_midnight = midnight + timedelta(days=1)
return int((next_midnight - now).total_seconds())
class QuotaTracker:
"""Thread-safe quota state. Caller wraps each request in `with q.acquire(user)`,
and after the BQ result lands records bytes via `record_bytes(user, n)`.
"""
def __init__(self, *, max_concurrent_per_user: int, max_daily_bytes_per_user: int):
self._max_concurrent = max_concurrent_per_user
self._max_daily_bytes = max_daily_bytes_per_user
self._lock = threading.Lock()
# state: { user_id: { "concurrent": int, "bucket_day": "YYYY-MM-DD", "bytes": int } }
self._state: dict[str, dict] = {}
def _ensure_bucket(self, user: str) -> dict:
today = _utc_today()
s = self._state.setdefault(user, {"concurrent": 0, "bucket_day": today, "bytes": 0})
if s["bucket_day"] != today:
s["bucket_day"] = today
s["bytes"] = 0
return s
@contextlib.contextmanager
def acquire(self, user: str) -> Iterator[None]:
with self._lock:
s = self._ensure_bucket(user)
if s["concurrent"] >= self._max_concurrent:
raise QuotaExceededError(
kind=KIND_CONCURRENT,
current=s["concurrent"],
limit=self._max_concurrent,
)
s["concurrent"] += 1
try:
yield
finally:
with self._lock:
s = self._ensure_bucket(user)
s["concurrent"] = max(0, s["concurrent"] - 1)
def record_bytes(self, user: str, n: int) -> None:
"""Record bytes consumed by a request that already executed.
Always commits the new total — even if it pushes the user past the
daily cap — so subsequent ``check_daily_budget`` calls see the
cumulative usage and reject pre-flight. This method NEVER raises
anymore — the post-scan recording shouldn't strand a fetch the
user already paid for. Pre-flight enforcement lives in
``check_daily_budget``.
"""
if n <= 0:
return
with self._lock:
s = self._ensure_bucket(user)
s["bytes"] = s["bytes"] + n
def check_daily_budget(self, user: str) -> None:
"""Pre-flight check: raise QuotaExceededError if the user is already
AT or OVER the daily cap. Call this BEFORE running the BQ scan, so
the user doesn't pay for a query whose result we'd then have to
block on response."""
with self._lock:
current = self._ensure_bucket(user)["bytes"]
if current >= self._max_daily_bytes:
raise QuotaExceededError(
kind=KIND_DAILY_BYTES,
current=current,
limit=self._max_daily_bytes,
retry_after_seconds=_seconds_until_utc_midnight(),
)
def bytes_used_today(self, user: str) -> int:
with self._lock:
return self._ensure_bucket(user)["bytes"]
# Module-level singleton (process-local quota state per spec §3.8). FastAPI
# dispatches sync handlers via a thread pool, so two concurrent first-time
# requests can both observe `_quota_singleton is None` and each construct a
# separate tracker; the second assignment wins and the first reference leaks
# split-brain quota state. Guard with an init lock + double-check.
#
# Note: `_quota_singleton` and `_quota_init_lock` are intentionally
# module-private. Callers MUST go through `_build_quota_tracker()` so the
# singleton stays single. Re-exporting `_quota_singleton` from another
# module via `from app.api.v2_quota import _quota_singleton` would copy the
# initial-None binding at import time and never see subsequent updates —
# that's a footgun. The function re-export is safe (it always reads the
# live module-global).
_quota_init_lock = threading.Lock()
_quota_singleton: "QuotaTracker | None" = None
def _build_quota_tracker() -> QuotaTracker:
"""Returns or constructs the process-local quota tracker (thread-safe).
Shared across `/api/v2/scan` (the original caller) and `/api/query`
(issue #160 cost guardrail) so the per-user daily byte cap accumulates
across both BQ-touching paths.
"""
from app.instance_config import get_value
global _quota_singleton
if _quota_singleton is not None:
return _quota_singleton
with _quota_init_lock:
if _quota_singleton is None:
_quota_singleton = QuotaTracker(
max_concurrent_per_user=int(
get_value("api", "scan", "max_concurrent_per_user", default=5) or 5
),
max_daily_bytes_per_user=int(
get_value("api", "scan", "max_daily_bytes_per_user", default=53687091200)
or 53687091200
),
)
return _quota_singleton