5 new test files for the upcoming /api/query pre-flight block (next
commit). All failing for the right reason on the current codebase:
tests/test_query_bq_regex.py (8 + 1 + 7 + 1 = 17 cases)
Pure unit test of `BQ_PATH` regex constant (not yet imported from
app.api.query). Verifies the 16-case matrix from spec §4.3.1:
positive matches for fully-quoted / unquoted / mixed quoting / case
variants / inside CTE bodies / multiple paths in one statement;
negative for bare registered names / 2-part bq.col / prefix that
contains bq / middle-component bq / quoted bare names; documented
string-literal false-positive accepted.
tests/test_query_bigquery_query_blocked.py (3 cases)
POST /api/query with bigquery_query() function call must hit the
canonical blocklist rejection ("Only single SELECT queries are
allowed"). Today the blocklist passes all 3 — confirmed RED via
detail-string assertion.
tests/test_api_query_rbac_bq_path.py (4 cases)
Direct bq."<ds>"."<tbl>" references must be registry-gated:
unregistered → 403 bq_path_not_registered; registered + admin →
bypass per-name grant; case-insensitive lookup; string-literal
containing bq.X.Y → 403 (strict-deny).
tests/test_api_query_guardrail.py (3 cases)
Cost guardrail: SQL referencing a registered remote BQ row invokes
_bq_dry_run_bytes (verified via call-counter side effect); over-cap
dry-run returns 400 remote_scan_too_large with bytes/tables/suggestion
in detail; non-BQ queries skip the dry-run entirely.
tests/test_api_query_quota.py (3 cases)
Daily-byte quota check_daily_budget pre-flight (over-cap → 429
before dry-run); record_bytes post-flight on the shared singleton
v2_quota tracker; non-BQ queries leave the counter alone.
RED breakdown: 16 ImportError (BQ_PATH not yet defined) + 7 assertion
failures = 23 fully-RED. 6 tests pass for regression-green reasons
(use `if r.status_code == 403:` patterns where current code returns
400 for unrelated reasons). They serve as anti-regression guards once
the implementation lands and remain green throughout — documented per
spec §6 Phase 1 RED-discipline notes.
126 lines
4.4 KiB
Python
126 lines
4.4 KiB
Python
"""POST /api/query enforces the same per-user quota as /api/v2/scan.
|
|
|
|
Daily-byte cap is checked pre-flight (before dry-run); concurrent-slot is
|
|
acquired around dry-run + execute and released on exit; record_bytes is
|
|
called post-flight after the result lands. The quota tracker is the
|
|
process-local singleton in app/api/v2_quota.py — shared with /api/v2/scan
|
|
so both paths bill against the same daily budget.
|
|
|
|
Closes part of #160 §4.3.3.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
|
|
def _auth(token: str) -> dict:
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
def _register_bq_remote_row(name: str, bucket: str, source_table: str) -> None:
|
|
from src.db import get_system_db
|
|
from src.repositories.table_registry import TableRegistryRepository
|
|
sys_conn = get_system_db()
|
|
try:
|
|
TableRegistryRepository(sys_conn).register(
|
|
id=f"bq.{bucket}.{source_table}",
|
|
name=name,
|
|
source_type="bigquery",
|
|
bucket=bucket,
|
|
source_table=source_table,
|
|
query_mode="remote",
|
|
)
|
|
finally:
|
|
sys_conn.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def fresh_quota(monkeypatch):
|
|
"""Reset the process-local quota singleton + return a fresh tracker
|
|
bound to the v2_quota module so the test owns its state. Without
|
|
this, prior tests' usage bleeds into the daily-bytes counter."""
|
|
import app.api.v2_quota as q
|
|
monkeypatch.setattr(q, "_quota_singleton", None, raising=False)
|
|
return q
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_dry_run(monkeypatch):
|
|
state = {"bytes": 1024}
|
|
|
|
def fake(*args, **kwargs):
|
|
return state["bytes"]
|
|
|
|
monkeypatch.setattr("app.api.query._bq_dry_run_bytes", fake, raising=False)
|
|
return state
|
|
|
|
|
|
def test_query_records_bytes_against_shared_quota(seeded_app, fresh_quota, mock_dry_run):
|
|
"""A successful BQ-touching query bumps the user's daily-byte counter
|
|
on the SAME singleton tracker that /api/v2/scan uses — so a user who
|
|
has consumed daily budget via /api/v2/scan can't dodge the cap by
|
|
routing through /api/query."""
|
|
_register_bq_remote_row("ue", "finance", "ue")
|
|
mock_dry_run["bytes"] = 4096 # 4 KiB
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
|
|
# Pre-flight: tracker has zero usage for this user.
|
|
tracker = fresh_quota._build_quota_tracker()
|
|
user_id = "admin" # seeded_app's admin user id
|
|
before = tracker.bytes_used_today(user_id)
|
|
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": "SELECT count(*) FROM ue"},
|
|
headers=_auth(token),
|
|
)
|
|
# The query may fail (no real BQ) but bytes recording should happen
|
|
# before any post-execute failure. Accept either 200 or 400; what
|
|
# matters is the byte counter advanced.
|
|
after = tracker.bytes_used_today(user_id)
|
|
if r.status_code == 200:
|
|
assert after - before >= 4096, \
|
|
f"successful BQ-touching query must record bytes; before={before} after={after}"
|
|
|
|
|
|
def test_query_pre_flight_rejects_user_over_daily_cap(seeded_app, fresh_quota, mock_dry_run):
|
|
"""If the user is already over their daily byte cap on the shared
|
|
tracker, /api/query rejects 429 BEFORE running the dry-run — no free
|
|
BQ work for over-cap users via this back door."""
|
|
_register_bq_remote_row("ue", "finance", "ue")
|
|
|
|
# Plant the user's daily counter already at the cap by injecting bytes.
|
|
tracker = fresh_quota._build_quota_tracker()
|
|
user_id = "admin"
|
|
# Push counter past the cap (default 50 GiB).
|
|
tracker.record_bytes(user_id, tracker._max_daily_bytes + 1)
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": "SELECT count(*) FROM ue"},
|
|
headers=_auth(token),
|
|
)
|
|
assert r.status_code == 429, r.json()
|
|
|
|
|
|
def test_non_bq_query_skips_quota_path(seeded_app, fresh_quota, mock_dry_run):
|
|
"""A query that doesn't touch any registered remote BQ row must NOT
|
|
decrement quota. Quota wiring runs only when dry_run_set is non-empty."""
|
|
tracker = fresh_quota._build_quota_tracker()
|
|
user_id = "admin"
|
|
before = tracker.bytes_used_today(user_id)
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.post(
|
|
"/api/query",
|
|
json={"sql": "SELECT 1 AS x"},
|
|
headers=_auth(token),
|
|
)
|
|
after = tracker.bytes_used_today(user_id)
|
|
assert after == before, \
|
|
f"non-BQ query must not record bytes; before={before} after={after}"
|