diff --git a/CHANGELOG.md b/CHANGELOG.md index 661c43d..0cfb583 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,46 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.30.1] — 2026-05-02 + +### Security +- **auth**: per-IP rate limiting now applied across every credential-bearing + auth endpoint. Defaults: + - **10/minute** — `POST /auth/token`, `POST /auth/password/login`, + `POST /auth/password/login/web` (login brute-force throttle). + - **10/minute** — `POST/GET /auth/email/verify`, + `POST /auth/password/reset/confirm`, `POST /auth/password/setup/confirm`, + `POST /auth/password/setup` (JSON variant — without it, the form + `/setup/confirm` throttle is bypassable by switching to the JSON + path) (token brute-force throttle: the 32-byte URL-safe tokens are + high entropy but partial leaks via logs / proxy referer have + surfaced before, and there's no reason to allow unbounded guessing). + - **5/minute** — `POST /auth/email/send-link`, + `POST /auth/password/reset`, `POST /auth/password/setup/request` + (email-bombing throttle: same shape on all three — attacker rotates + random recipient addresses from a single IP to burn SMTP/SendGrid + quota and spam real users; anti-enumeration responses mask which + addresses landed). + - **3/minute** — `POST /auth/bootstrap` (one-shot in normal use). + Returns `429` with `Retry-After: 60` once exceeded. Per-IP key uses the + leftmost `X-Forwarded-For` hop — same trust model as + `app.auth.dependencies._client_ip` (Caddy strips client-supplied XFF in + front of the app). Set `AGNES_AUTH_RATELIMIT_ENABLED=0` in env and + bounce the container to disable (no image rebuild required; the value + is read at process start, matching every other Agnes env knob). New + dependency: `slowapi>=0.1.9`. Closes #45. +- **admin API**: `DELETE /api/admin/users/{id}/memberships/{group_id}` and + `DELETE /api/admin/groups/{group_id}/members/{user_id}` now refuse to + remove **anyone** from the seeded `Admin` group when they are the only + remaining active admin — previously the guard only fired on self-removal, + leaving a path where an admin could demote the only other admin and then + rely on the partial guard to (correctly) block self-removal, but a + scheduler / bootstrap path that bypasses normal admin checks could still + reduce active admins to zero. Recovery from zero admins requires direct + DB access, so the guard generalizes to mirror the existing + `count_admins(active_only=True) <= 1` check on `DELETE /api/admin/users/{id}` + and `PATCH /api/admin/users/{id}` (active=false). Closes #151. + ### Fixed - **admin API**: `POST /api/admin/register-table` and `PUT /api/admin/registry/{id}` now reject `source_query` containing BigQuery-native backtick identifiers diff --git a/app/api/access.py b/app/api/access.py index 045e951..b55f6c3 100644 --- a/app/api/access.py +++ b/app/api/access.py @@ -23,7 +23,7 @@ import duckdb from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel -from app.auth.access import require_admin +from app.auth.access import is_user_admin, require_admin from app.auth.dependencies import _get_db, get_current_user from app.resource_types import RESOURCE_TYPES, ResourceType, list_resource_types from src.repositories.audit import AuditRepository @@ -564,18 +564,23 @@ async def remove_member( conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): members = UserGroupMembersRepository(conn) - # Block removing yourself from Admin if you're the last admin — same - # protection as the user-management endpoints. + # Last-admin guard: refuse to remove anyone from the seeded Admin group + # when they are the only active admin — recovery from zero admins + # requires direct DB access. Same protection as delete_user / update_user + # (active=False) in app/api/users.py. group = UserGroupsRepository(conn).get(group_id) if not group: raise HTTPException(status_code=404, detail="Group not found") _guard_google_managed(group) - if group["name"] == "Admin" and user_id == user["id"]: - if UserRepository(conn).count_admins(active_only=True) <= 1: - raise HTTPException( - status_code=409, - detail="Cannot remove yourself from Admin — you are the last admin", - ) + if ( + group["name"] == "Admin" + and is_user_admin(user_id, conn) + and UserRepository(conn).count_admins(active_only=True) <= 1 + ): + raise HTTPException( + status_code=409, + detail="Cannot remove the last admin — at least one user must remain in the Admin group", + ) # Only delete admin-source rows from this endpoint. Google-sync rows # rebuild themselves on next login; system_seed rows survive deploys. removed = members.remove_member(user_id, group_id, require_source="admin") @@ -845,19 +850,23 @@ async def remove_user_from_group( """Remove a user from a group from the user-centric page. Only deletes admin-source rows (Google-sync / system-seed managed - elsewhere). Last-admin guard: refuse to remove yourself from Admin - when you'd be the only remaining admin — keeps the system unlockable. + elsewhere). Last-admin guard: refuse to remove anyone from Admin + when they are the only active admin — recovery from zero admins + requires direct DB access. """ group = UserGroupsRepository(conn).get(group_id) if not group: raise HTTPException(status_code=404, detail="Group not found") _guard_google_managed(group) - if group["name"] == "Admin" and user_id == user["id"]: - if UserRepository(conn).count_admins(active_only=True) <= 1: - raise HTTPException( - status_code=409, - detail="Cannot remove yourself from Admin — you are the last admin", - ) + if ( + group["name"] == "Admin" + and is_user_admin(user_id, conn) + and UserRepository(conn).count_admins(active_only=True) <= 1 + ): + raise HTTPException( + status_code=409, + detail="Cannot remove the last admin — at least one user must remain in the Admin group", + ) members = UserGroupMembersRepository(conn) removed = members.remove_member(user_id, group_id, require_source="admin") if not removed: @@ -907,8 +916,6 @@ async def user_effective_access( if not UserRepository(conn).get_by_id(user_id): raise HTTPException(status_code=404, detail="User not found") - from app.auth.access import is_user_admin - # JOIN user's group memberships with their grants. group_concat-style # aggregation isn't worth it — render side-by-side rows and let the UI # collapse same (resource_type, resource_id) into a single line. @@ -961,8 +968,6 @@ async def my_effective_access( the profile page audits the actual grant graph; runtime authorization still gives Admin god-mode regardless of this list.""" user_id = user["id"] - from app.auth.access import is_user_admin - rows = conn.execute( """SELECT rg.resource_type, rg.resource_id, g.id AS group_id, g.name AS group_name diff --git a/app/auth/providers/email.py b/app/auth/providers/email.py index 2d46744..4cde091 100644 --- a/app/auth/providers/email.py +++ b/app/auth/providers/email.py @@ -6,7 +6,7 @@ import secrets from datetime import datetime, timedelta, timezone from urllib.parse import quote -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import RedirectResponse from pydantic import BaseModel import duckdb @@ -14,6 +14,7 @@ import duckdb from app.auth.jwt import create_access_token from app.auth.access import is_user_admin from app.auth.dependencies import _get_db, is_local_dev_mode +from app.auth.rate_limit import limiter as _rate_limiter from src.repositories.users import UserRepository @@ -59,8 +60,10 @@ def _build_magic_link(email: str, token: str) -> str: @router.post("/send-link") +@_rate_limiter.limit("5/minute") async def send_magic_link( - request: MagicLinkRequest, + request: Request, + body: MagicLinkRequest, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Send a magic link to the user's email. @@ -70,7 +73,7 @@ async def send_magic_link( click it without an email transport. """ repo = UserRepository(conn) - user = repo.get_by_email(request.email) + user = repo.get_by_email(body.email) # Always return success to prevent email enumeration if not user: @@ -84,20 +87,20 @@ async def send_magic_link( reset_token_created=datetime.now(timezone.utc), ) - link = _build_magic_link(request.email, token) + link = _build_magic_link(body.email, token) send_error: str | None = None if _has_email_transport(): try: - _send_email(request.email, token) + _send_email(body.email, token) except Exception as e: send_error = str(e) - logger.error("Failed to send magic link email to %s: %s", request.email, e) + logger.error("Failed to send magic link email to %s: %s", body.email, e) # Dev fallback: expose the link in logs + response so you can click it without SMTP. # Scoped strictly to LOCAL_DEV_MODE so test and production behavior are unchanged. if is_local_dev_mode(): logger.warning("=" * 60) - logger.warning("Magic link for %s (LOCAL_DEV_MODE fallback):", request.email) + logger.warning("Magic link for %s (LOCAL_DEV_MODE fallback):", body.email) logger.warning(" %s", link) logger.warning("=" * 60) response: dict = { @@ -184,19 +187,27 @@ def _consume_token(conn: duckdb.DuckDBPyConnection, email: str, token: str) -> d @router.post("/verify") +@_rate_limiter.limit("10/minute") async def verify_magic_link( - request: MagicLinkVerify, + request: Request, + body: MagicLinkVerify, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Verify a magic link token and issue JWT (JSON API for programmatic clients).""" - user = _consume_token(conn, request.email, request.token) + """Verify a magic link token and issue JWT (JSON API for programmatic clients). + + Rate limited 10/min per IP to slow brute-forcing the 32-byte + ``reset_token`` (the same column doubles as the magic-link token). + """ + user = _consume_token(conn, body.email, body.token) role_label = _role_label(user, conn) jwt_token = create_access_token(user["id"], user["email"]) return {"access_token": jwt_token, "token_type": "bearer", "email": user["email"], "role": role_label} @router.get("/verify") +@_rate_limiter.limit("10/minute") async def verify_magic_link_get( + request: Request, email: str, token: str, conn: duckdb.DuckDBPyConnection = Depends(_get_db), @@ -205,6 +216,9 @@ async def verify_magic_link_get( This is the URL we embed in outgoing emails (and the dev-fallback link), so clicking it in a mail client logs the user in without a separate API call. + + Rate limited 10/min per IP for the same reason as the POST variant — + don't let the click-through path bypass the brute-force throttle. """ user = _consume_token(conn, email, token) jwt_token = create_access_token(user["id"], user["email"]) diff --git a/app/auth/providers/password.py b/app/auth/providers/password.py index dce699c..eab2080 100644 --- a/app/auth/providers/password.py +++ b/app/auth/providers/password.py @@ -16,6 +16,7 @@ from argon2.exceptions import VerifyMismatchError from app.auth.jwt import create_access_token from app.auth.access import is_user_admin from app.auth.dependencies import _get_db, is_local_dev_mode +from app.auth.rate_limit import limiter as _rate_limiter from src.repositories.users import UserRepository @@ -197,13 +198,15 @@ def send_setup_email(request: Request, email: str, token: str) -> bool: # ---- Existing flows ---- @router.post("/login") +@_rate_limiter.limit("10/minute") async def password_login( - request: PasswordLoginRequest, + request: Request, + body: PasswordLoginRequest, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Login with email + password.""" repo = UserRepository(conn) - user = repo.get_by_email(request.email) + user = repo.get_by_email(body.email) if not user or not user.get("password_hash"): raise HTTPException(status_code=401, detail="Invalid email or password") if not bool(user.get("active", True)): @@ -211,7 +214,7 @@ async def password_login( try: ph = PasswordHasher() - ph.verify(user["password_hash"], request.password) + ph.verify(user["password_hash"], body.password) except VerifyMismatchError: raise HTTPException(status_code=401, detail="Invalid email or password") except Exception: @@ -224,7 +227,9 @@ async def password_login( @router.post("/login/web") +@_rate_limiter.limit("10/minute") async def password_login_web( + request: Request, email: str = Form(...), password: str = Form(""), next: str = Form(""), @@ -258,11 +263,19 @@ async def password_login_web( # ---- JSON programmatic setup (backward compat — used by existing tests) ---- @router.post("/setup") +@_rate_limiter.limit("10/minute") async def password_setup( + request: Request, request_body: PasswordSetupRequest, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Set initial password using setup token (JSON API).""" + """Set initial password using setup token (JSON API). + + Rate limited 10/min per IP — same throttle as the form sibling + ``/setup/confirm``. Without this, the new web-form throttle is + bypassable: an attacker brute-forcing the ``setup_token`` just + switches to this JSON path and resumes at unbounded RPS. + """ repo = UserRepository(conn) user = repo.get_by_email(request_body.email) if not user: @@ -301,12 +314,19 @@ async def reset_page( @router.post("/reset") +@_rate_limiter.limit("5/minute") async def reset_request( request: Request, email: str = Form(""), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Request a password-reset link. Anti-enumeration: same response regardless.""" + """Request a password-reset link. Anti-enumeration: same response regardless. + + Rate limited at the same 5/min as ``/auth/email/send-link`` — the + attack surface is identical (single IP rotates random recipient + addresses, anti-enumeration response shape masks which addresses + landed, attacker burns SMTP / SendGrid quota + spams real users). + """ # Match the rest of the codebase's case-sensitive lookup (password_login, # email magic-link, admin create). Lowercasing here would silently fail # for mixed-case emails the admin stored as-is. @@ -331,6 +351,7 @@ async def reset_request( @router.post("/reset/confirm") +@_rate_limiter.limit("10/minute") async def reset_confirm( request: Request, email: str = Form(...), @@ -339,7 +360,13 @@ async def reset_confirm( confirm_password: str = Form(...), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Submit a new password using a reset token.""" + """Submit a new password using a reset token. + + Rate limited 10/min per IP to slow brute-force guessing of the 32-byte + URL-safe ``reset_token`` — the token is high-entropy but logs / proxy + referer leaks have surfaced partial tokens before, and there's no + reason to allow unbounded attempts. + """ if password != confirm_password: return _render_reset_form(request, email=email, token=token, error="Passwords do not match.") if len(password) < MIN_PASSWORD_LEN: @@ -421,12 +448,18 @@ async def setup_page( @router.post("/setup/request") +@_rate_limiter.limit("5/minute") async def setup_request( request: Request, email: str = Form(""), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Self-service 'Request Access' — emails a setup link if user is pre-approved and unset.""" + """Self-service 'Request Access' — emails a setup link if user is pre-approved and unset. + + Same 5/min rate limit as ``/auth/password/reset`` and ``/send-link`` + — same email-bombing surface (anti-enumeration response, sends mail + on each request). + """ # Match the rest of the codebase's case-sensitive lookup (password_login, # email magic-link, admin create). Lowercasing here would silently fail # for mixed-case emails the admin stored as-is. @@ -452,6 +485,7 @@ async def setup_request( @router.post("/setup/confirm") +@_rate_limiter.limit("10/minute") async def setup_confirm( request: Request, email: str = Form(...), @@ -461,7 +495,12 @@ async def setup_confirm( name: str = Form(""), conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): - """Web form: complete initial password setup via setup token.""" + """Web form: complete initial password setup via setup token. + + Rate limited 10/min per IP — same rationale as ``/reset/confirm``: + high-entropy ``setup_token`` should still not be brute-forceable at + unbounded RPS in case a partial token leaks via logs / referer. + """ if password != confirm_password: return _render_setup_form(request, email=email, token=token, name=name, error="Passwords do not match.") if len(password) < MIN_PASSWORD_LEN: diff --git a/app/auth/rate_limit.py b/app/auth/rate_limit.py new file mode 100644 index 0000000..82ecd95 --- /dev/null +++ b/app/auth/rate_limit.py @@ -0,0 +1,101 @@ +"""Per-IP rate limiting for auth endpoints (#45). + +Why: every auth endpoint was unthrottled before this module — `grep -r +"slowapi\\|limiter\\|throttle"` returned zero hits in app/. That left +``/auth/password/login`` and ``/auth/token`` open to password brute-force +and ``/auth/email/send-link`` open to SMTP/SendGrid email-bombing +(attacker loops with random recipients and burns through quota). + +How: slowapi installs a starlette middleware that rejects with 429 when +the per-route ``@limiter.limit("N/period")`` decorator is exceeded. The +key is the client IP, taken from the leftmost X-Forwarded-For hop (Caddy +in front of the app strips client-supplied XFF and sets its own — same +trust model as ``app.auth.dependencies._client_ip``). + +Operator override: set ``AGNES_AUTH_RATELIMIT_ENABLED=0`` and restart +the process (no image rebuild needed — flip the env in the compose +``.env`` / systemd unit and bounce the container). The value is read at +process start because the slowapi ``Limiter`` constructor freezes +``enabled`` at import; that limitation is fine in practice because +Agnes's other env knobs already require a process restart to take +effect (see ``.env_overlay`` loader in ``app/main.py`` for the same +shape — file-based overlay merged at startup, no live reload). + +The test suite flips ``limiter.enabled`` directly via an autouse +conftest fixture (no restart required because tests share a process) +and re-enables only inside the dedicated rate-limit test, so +generous-but-finite limits don't bleed into other test files that +hammer auth endpoints in tight loops. +""" + +from __future__ import annotations + +import os + +from slowapi import Limiter +from slowapi.errors import RateLimitExceeded +from slowapi.middleware import SlowAPIMiddleware +from slowapi.util import get_remote_address +from starlette.requests import Request +from starlette.responses import JSONResponse + + +def _client_ip_key(request: Request) -> str: + """IP key, preferring leftmost X-Forwarded-For hop. + + Mirrors ``app.auth.dependencies._client_ip`` — same Caddy-in-front + trust model. If the app is ever exposed directly to the internet + without a proxy, the XFF header becomes client-settable and an + attacker can rotate the per-IP bucket trivially. Document that + deployment shape in the runbook before flipping it on. + """ + xff = request.headers.get("x-forwarded-for") + if xff: + ip = xff.split(",", 1)[0].strip() + if ip: + return ip + return get_remote_address(request) + + +def _enabled_default() -> bool: + return os.environ.get("AGNES_AUTH_RATELIMIT_ENABLED", "1").lower() not in ( + "0", "false", "no", "off", + ) + + +# Module-level singleton — slowapi binds storage at construction and the +# decorators capture this exact instance at import time. Tests toggle +# ``limiter.enabled`` and call ``limiter.reset()`` between cases. +# +# headers_enabled is intentionally OFF: when on, slowapi injects +# X-RateLimit-* headers via a per-handler response parameter, which forces +# every decorated endpoint to add ``response: Response`` even on the happy +# path. The protection here is the 429 with Retry-After (still emitted by +# the exception handler below) — the diagnostic headers on success +# responses are not worth the API-shape churn across 5 endpoints. +limiter = Limiter( + key_func=_client_ip_key, + enabled=_enabled_default(), + headers_enabled=False, + default_limits=[], +) + + +async def _rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse: + """Match Agnes's existing JSON error shape (``{"detail": "..."}``) + instead of slowapi's text/plain default — keeps the CLI / web error + parser uniform across all 4xx responses. + """ + return JSONResponse( + {"detail": f"Too many requests — {exc.detail}"}, + status_code=429, + headers={"Retry-After": "60"}, + ) + + +__all__ = [ + "limiter", + "RateLimitExceeded", + "SlowAPIMiddleware", + "_rate_limit_exceeded_handler", +] diff --git a/app/auth/router.py b/app/auth/router.py index 8e51ac6..2311886 100644 --- a/app/auth/router.py +++ b/app/auth/router.py @@ -3,7 +3,7 @@ import logging import uuid -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel import duckdb @@ -13,6 +13,7 @@ from argon2.exceptions import VerifyMismatchError from app.auth.jwt import create_access_token from app.auth.access import is_user_admin from app.auth.dependencies import _get_db +from app.auth.rate_limit import limiter as _rate_limiter from src.db import SYSTEM_ADMIN_GROUP from src.repositories.users import UserRepository from src.repositories.user_group_members import UserGroupMembersRepository @@ -59,13 +60,15 @@ def _audit(user_id: str, action: str, result: str | None = None) -> None: @router.post("/token", response_model=TokenResponse) +@_rate_limiter.limit("10/minute") async def create_token( - request: TokenRequest, + request: Request, + body: TokenRequest, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Issue a JWT token. Requires password authentication.""" repo = UserRepository(conn) - user = repo.get_by_email(request.email) + user = repo.get_by_email(body.email) if not user: raise HTTPException(status_code=401, detail="User not found") if not bool(user.get("active", True)): @@ -74,11 +77,11 @@ async def create_token( # If user has password_hash, require and verify it if user.get("password_hash"): - if not request.password: + if not body.password: raise HTTPException(status_code=401, detail="Password required") try: ph = PasswordHasher() - ph.verify(user["password_hash"], request.password) + ph.verify(user["password_hash"], body.password) except VerifyMismatchError: _audit(user["id"], "login_failed", result="invalid_password") raise HTTPException(status_code=401, detail="Invalid password") @@ -107,8 +110,10 @@ async def create_token( @router.post("/bootstrap", response_model=TokenResponse) +@_rate_limiter.limit("3/minute") async def bootstrap( - request: BootstrapRequest, + request: Request, + body: BootstrapRequest, conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): """Bootstrap the first admin account. @@ -136,10 +141,10 @@ async def bootstrap( detail="Bootstrap disabled — a user with a password already exists. Use /auth/password/login.", ) - password_hash = PasswordHasher().hash(request.password) if request.password else None + password_hash = PasswordHasher().hash(body.password) if body.password else None # If a matching user already exists (e.g. seed), update it; else create fresh. - existing_user = next((u for u in existing if u.get("email") == request.email), None) + existing_user = next((u for u in existing if u.get("email") == body.email), None) if existing_user: user_id = existing_user["id"] repo.update(id=user_id, password_hash=password_hash) @@ -148,8 +153,8 @@ async def bootstrap( user_id = str(uuid.uuid4()) repo.create( id=user_id, - email=request.email, - name=request.name or request.email.split("@")[0], + email=body.email, + name=body.name or body.email.split("@")[0], password_hash=password_hash, ) _audit(user_id, "bootstrap_completed") @@ -167,10 +172,10 @@ async def bootstrap( added_by="auth.bootstrap", ) - token = create_access_token(user_id=user_id, email=request.email) + token = create_access_token(user_id=user_id, email=body.email) return TokenResponse( access_token=token, user_id=user_id, - email=request.email, + email=body.email, role="admin", ) diff --git a/app/main.py b/app/main.py index c4debc4..c757aaa 100644 --- a/app/main.py +++ b/app/main.py @@ -89,6 +89,12 @@ class _SelectiveGZipMiddleware: return await self._gzip(scope, receive, send) +from app.auth.rate_limit import ( + SlowAPIMiddleware as _AuthRateLimitMiddleware, + RateLimitExceeded as _AuthRateLimitExceeded, + _rate_limit_exceeded_handler as _auth_rate_limit_handler, + limiter as _auth_rate_limiter, +) from app.auth.router import router as auth_router from app.api.health import router as health_router from app.api.sync import router as sync_router @@ -272,6 +278,15 @@ def create_app() -> FastAPI: ), ) + # Per-IP rate limiting on auth endpoints (#45). Wired here so the + # SlowAPIMiddleware sits in the standard middleware chain (above CORS, + # below GZip — order doesn't affect correctness, only metric/log + # ordering). The limiter singleton is created at import time in + # app.auth.rate_limit; we just register state + middleware + handler. + app.state.limiter = _auth_rate_limiter + app.add_middleware(_AuthRateLimitMiddleware) + app.add_exception_handler(_AuthRateLimitExceeded, _auth_rate_limit_handler) + # Session middleware (required for OAuth state) from app.secrets import get_session_secret session_secret = get_session_secret() diff --git a/pyproject.toml b/pyproject.toml index 562f79c..f9e98be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.30.0" +version = "0.30.1" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" @@ -52,6 +52,10 @@ dependencies = [ # In-process TTL cache for marketplace etag (transitively present via # google-auth, declared explicitly here because we depend on it directly). "cachetools>=5.3.0", + # Per-IP rate limiting on auth endpoints (#45). In-process counters by + # default — fine for single-replica deploys. Multi-replica rollouts can + # swap the storage backend via slowapi's `storage_uri` (Redis, Memcached). + "slowapi>=0.1.9", ] [project.optional-dependencies] diff --git a/tests/conftest.py b/tests/conftest.py index 5fdf0ed..ca3a93f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,29 @@ os.makedirs(os.path.join(os.environ["DATA_DIR"], "notifications"), exist_ok=True os.makedirs(os.path.join(os.environ["DATA_DIR"], "state"), exist_ok=True) +@pytest.fixture(autouse=True) +def _disable_auth_rate_limit_in_tests(): + """Disable the slowapi auth rate limiter for every test by default. + + Production limits (e.g. 10/minute on /auth/password/login) would otherwise + bleed into test files that hammer auth endpoints in tight loops — those + tests existed long before the limiter and shouldn't have to know about + its bucket sizes. The dedicated rate-limit test in test_auth_rate_limit.py + flips ``limiter.enabled = True`` and resets state inside its own scope. + """ + from app.auth.rate_limit import limiter + was_enabled = limiter.enabled + limiter.enabled = False + try: + limiter.reset() + except Exception: + # In-memory backend always resets cleanly; defensive guard for + # third-party storage backends operators might wire in later. + pass + yield + limiter.enabled = was_enabled + + @pytest.fixture def e2e_env(tmp_path, monkeypatch): """Set up complete E2E environment with DATA_DIR, create dirs.""" diff --git a/tests/test_auth_rate_limit.py b/tests/test_auth_rate_limit.py new file mode 100644 index 0000000..3a684fe --- /dev/null +++ b/tests/test_auth_rate_limit.py @@ -0,0 +1,228 @@ +"""#45: per-IP rate limiting on auth endpoints. + +Each test re-enables the limiter (the autouse conftest fixture disables it +by default for the rest of the suite) and resets bucket state to avoid +order-dependence. Limits live in ``app.auth.providers.*`` and +``app.auth.router`` decorators — adjust here when you bump them. +""" + +from __future__ import annotations + +import os +import tempfile +import uuid + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture +def fresh_db(monkeypatch): + with tempfile.TemporaryDirectory() as tmp: + monkeypatch.setenv("DATA_DIR", tmp) + from src.db import close_system_db + close_system_db() + yield tmp + close_system_db() + + +@pytest.fixture +def app_with_ratelimit(monkeypatch, fresh_db): + """TestClient with the limiter forced on, bucket reset before each call.""" + monkeypatch.setenv("TESTING", "1") + monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!") + monkeypatch.setenv("AGNES_AUTH_RATELIMIT_ENABLED", "1") + from app.auth.rate_limit import limiter + limiter.enabled = True + limiter.reset() + from app.main import app + return TestClient(app) + + +def _seed_admin(fresh_db, password: str | None = None): + """Seed an admin user, optionally with an argon2-hashed password set.""" + from src.db import SYSTEM_ADMIN_GROUP, get_system_db + from src.repositories.user_group_members import UserGroupMembersRepository + from src.repositories.users import UserRepository + conn = get_system_db() + try: + uid = str(uuid.uuid4()) + password_hash = None + if password: + from argon2 import PasswordHasher + password_hash = PasswordHasher().hash(password) + UserRepository(conn).create( + id=uid, email="admin@test", name="Admin", + password_hash=password_hash, + ) + admin_gid = conn.execute( + "SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP] + ).fetchone()[0] + UserGroupMembersRepository(conn).add_member(uid, admin_gid, source="system_seed") + return uid + finally: + conn.close() + + +def test_password_login_rate_limited_after_10_requests(app_with_ratelimit, fresh_db): + """11th request inside the per-minute window → 429.""" + _seed_admin(fresh_db, password="correct-horse-battery-staple") + statuses = [] + for _ in range(11): + resp = app_with_ratelimit.post( + "/auth/password/login", + json={"email": "admin@test", "password": "wrong"}, + ) + statuses.append(resp.status_code) + # First 10 may be 401 (wrong password); the 11th must be 429 from slowapi. + assert statuses[:10] == [401] * 10, f"unexpected pre-limit statuses: {statuses[:10]}" + assert statuses[10] == 429, f"expected 11th request to 429, got {statuses[10]}" + + +def test_email_send_link_rate_limited_after_5_requests(app_with_ratelimit, fresh_db): + """6th /send-link inside the per-minute window → 429. + + Covers the email-bombing scenario: a single IP rotating through random + recipient addresses gets throttled regardless of whether each address + actually exists (the endpoint always returns success to prevent + enumeration, so the limit is the only gate).""" + statuses = [] + for i in range(6): + resp = app_with_ratelimit.post( + "/auth/email/send-link", + json={"email": f"victim-{i}@example.com"}, + ) + statuses.append(resp.status_code) + assert statuses[:5] == [200] * 5, f"unexpected pre-limit statuses: {statuses[:5]}" + assert statuses[5] == 429, f"expected 6th request to 429, got {statuses[5]}" + + +def test_bootstrap_rate_limited_after_3_requests(app_with_ratelimit, fresh_db): + """4th /auth/bootstrap inside the per-minute window → 429. + + Bootstrap is one-shot in normal use; the tight 3/minute limit exists + to slow brute-force enumeration of the 'no users with password yet' + state without breaking legitimate retry-on-typo flows.""" + statuses = [] + for i in range(4): + resp = app_with_ratelimit.post( + "/auth/bootstrap", + json={"email": f"first-admin-{i}@example.com", "password": "x" * 12}, + ) + statuses.append(resp.status_code) + # First request 200 (bootstrap path), subsequent 403 (already bootstrapped), + # but the count includes ALL requests — 4th must be 429 regardless of + # business-logic outcome of requests 2-3. + assert statuses[3] == 429, ( + f"expected 4th /bootstrap to 429, got {statuses[3]} (full sequence: {statuses})" + ) + + +def test_password_reset_rate_limited_after_5_requests(app_with_ratelimit, fresh_db): + """6th /auth/password/reset → 429. Same email-bombing surface as + /send-link — anti-enumeration response, sends mail per request, attacker + rotates random recipients from a single IP. Pre-fix this endpoint was + unthrottled even though /send-link was — code-reviewer flagged the gap.""" + statuses = [] + for i in range(6): + resp = app_with_ratelimit.post( + "/auth/password/reset", + data={"email": f"victim-{i}@example.com"}, + ) + statuses.append(resp.status_code) + # Pre-limit responses are 200 (HTML "check your email" page — anti-enum). + assert statuses[:5] == [200] * 5, f"unexpected pre-limit statuses: {statuses[:5]}" + assert statuses[5] == 429, f"expected 6th to 429, got {statuses[5]}" + + +def test_password_setup_request_rate_limited_after_5_requests(app_with_ratelimit, fresh_db): + """6th /auth/password/setup/request → 429. Same surface as /reset.""" + statuses = [] + for i in range(6): + resp = app_with_ratelimit.post( + "/auth/password/setup/request", + data={"email": f"newcomer-{i}@example.com"}, + ) + statuses.append(resp.status_code) + assert statuses[:5] == [200] * 5 + assert statuses[5] == 429 + + +def test_reset_confirm_rate_limited_after_10_requests(app_with_ratelimit, fresh_db): + """11th /auth/password/reset/confirm → 429. Token brute-force throttle: + the reset token is high-entropy but partial leaks (logs, referer) have + surfaced before; unbounded guess rate would let an attacker exhaust the + keyspace adjacent to a leaked prefix.""" + statuses = [] + for i in range(11): + resp = app_with_ratelimit.post( + "/auth/password/reset/confirm", + data={ + "email": "x@example.com", + "token": f"guess-{i}", + "password": "newpassword123", + "confirm_password": "newpassword123", + }, + ) + statuses.append(resp.status_code) + # Pre-limit returns the form re-rendered with 'Invalid or expired' + # error (status 200, anti-enum). Whatever the body says, the throttle + # must trip on attempt 11. + assert statuses[10] == 429, f"expected 11th to 429, got {statuses[10]} (full: {statuses})" + + +def test_password_setup_json_rate_limited_after_10_requests(app_with_ratelimit, fresh_db): + """11th POST /auth/password/setup (JSON variant) → 429. + + Without this, the form-side ``/setup/confirm`` 10/min limit is + bypassable — an attacker brute-forcing ``setup_token`` just switches + to this JSON path and resumes at unbounded RPS. Caught by an + independent code review on PR #165. + """ + statuses = [] + for i in range(11): + resp = app_with_ratelimit.post( + "/auth/password/setup", + json={ + "email": "x@example.com", + "token": f"guess-{i}", + "password": "newpassword123", + }, + ) + statuses.append(resp.status_code) + assert statuses[10] == 429, f"expected 11th to 429, got {statuses[10]} (full: {statuses})" + + +def test_email_verify_get_rate_limited_after_10_requests(app_with_ratelimit, fresh_db): + """11th GET /auth/email/verify → 429. Closes the click-through bypass: + the GET variant is what we embed in outgoing emails, so leaving it + unthrottled while throttling POST would let an attacker just hit the + GET endpoint to brute-force tokens at unbounded RPS.""" + statuses = [] + for i in range(11): + resp = app_with_ratelimit.get( + f"/auth/email/verify?email=x@example.com&token=guess-{i}", + follow_redirects=False, + ) + statuses.append(resp.status_code) + assert statuses[10] == 429, f"expected 11th to 429, got {statuses[10]} (full: {statuses})" + + +def test_rate_limit_disabled_via_env(monkeypatch, fresh_db): + """``AGNES_AUTH_RATELIMIT_ENABLED=0`` (operator escape hatch) must let + every request through, no matter how many fire in the same window.""" + monkeypatch.setenv("TESTING", "1") + monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!") + from app.auth.rate_limit import limiter + limiter.enabled = False # mirrors what the env-var would do at module load + limiter.reset() + from app.main import app + client = TestClient(app) + statuses = [ + client.post( + "/auth/email/send-link", + json={"email": f"x{i}@example.com"}, + ).status_code + for i in range(20) + ] + assert all(s == 200 for s in statuses), f"unexpected throttling: {statuses}" diff --git a/tests/test_user_management.py b/tests/test_user_management.py index c178091..dca429f 100644 --- a/tests/test_user_management.py +++ b/tests/test_user_management.py @@ -293,6 +293,79 @@ def test_deactivated_admin_rejected_by_active_check(app_client, fresh_db): assert "deactivated" in resp.json().get("detail", "").lower() +def test_cannot_remove_last_admin_via_user_memberships(app_client, fresh_db): + """v19 #151: DELETE /api/admin/users/{id}/memberships/{group_id} must + refuse to remove the only active admin from the seeded Admin group — + even when the caller is a different admin (covers the case where + a second admin was added then the first was deactivated, leaving + one active admin who could otherwise be demoted to zero).""" + from src.db import SYSTEM_ADMIN_GROUP, get_system_db + admin_id, token = _seed_admin(fresh_db) + conn = get_system_db() + try: + admin_gid = conn.execute( + "SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP] + ).fetchone()[0] + finally: + conn.close() + # Sole-admin case: try to demote the only admin via the user-keyed + # memberships endpoint. + resp = app_client.delete( + f"/api/admin/users/{admin_id}/memberships/{admin_gid}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 409 + assert "last admin" in resp.json()["detail"].lower() + + +def test_cannot_remove_last_admin_via_group_members(app_client, fresh_db): + """v19 #151: DELETE /api/admin/groups/{group_id}/members/{user_id} must + refuse to demote the only active admin (group-keyed mirror of the + user-keyed membership endpoint).""" + from src.db import SYSTEM_ADMIN_GROUP, get_system_db + admin_id, token = _seed_admin(fresh_db) + conn = get_system_db() + try: + admin_gid = conn.execute( + "SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP] + ).fetchone()[0] + finally: + conn.close() + resp = app_client.delete( + f"/api/admin/groups/{admin_gid}/members/{admin_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 409 + assert "last admin" in resp.json()["detail"].lower() + + +def test_can_remove_admin_when_another_active_admin_exists(app_client, fresh_db): + """Sanity: with two active admins, demoting one via the membership + endpoint must succeed — the guard fires only at count_admins <= 1.""" + import uuid + from src.db import SYSTEM_ADMIN_GROUP, get_system_db + from src.repositories.user_group_members import UserGroupMembersRepository + from src.repositories.users import UserRepository + admin_id, token = _seed_admin(fresh_db) + conn = get_system_db() + try: + admin_gid = conn.execute( + "SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP] + ).fetchone()[0] + other_id = str(uuid.uuid4()) + UserRepository(conn).create(id=other_id, email="other@test", name="Other") + UserGroupMembersRepository(conn).add_member( + other_id, admin_gid, source="admin", added_by="admin@test", + ) + finally: + conn.close() + resp = app_client.delete( + f"/api/admin/users/{other_id}/memberships/{admin_gid}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 204 + + def test_cannot_deactivate_last_admin(app_client, fresh_db): """v19: try to deactivate the last active admin → 409. Admin demotion is now done via group membership (DELETE /api/admin/users/{id}/memberships/{group_id}),