agnes-the-ai-analyst/app/api/me_debug.py
minasarustamyan 7a06f1a585
feat(auth): /me/debug self-only auth diagnostic page (#116)
Adds /me/debug HTML page rendering the logged-in user's own session state — decoded JWT claims (no raw token, sha256[:12] fingerprint for log correlation), group memberships with sources and bound external_id when present, resource grants effective via those memberships, and a Refetch from Google (dry-run) button that diffs a fresh fetch_user_groups call against the cached user_group_members snapshot. Gated by AGNES_DEBUG_AUTH env var (default off → 404, route existence undetectable in production). Self-only by construction: user_id is read from the validated session, never echoes raw JWT / password hash / full PAT. Tolerates v13 + v14 schemas via information_schema check on users.external_id.
2026-04-29 06:36:28 +02:00

291 lines
12 KiB
Python

"""Self-service auth diagnostic page.
Behind the ``AGNES_DEBUG_AUTH=true`` env flag (default off → 404). Lets a
logged-in user inspect their own session: decoded JWT claims, group
memberships with sources, resource grants, and what Google Workspace would
return on a fresh sync (dry-run, no DB writes).
Hard rules — designed so even if the env flag accidentally lands in
production, no sensitive material leaks:
- Never render the raw JWT, only its claims + a short sha256 fingerprint
(so it can be correlated against logs without being replayable).
- Never render password hashes, full PAT tokens, or session cookie values.
- Self-only — the user_id comes from the validated session, not a query
parameter or path param. There is no admin-views-anyone surface here.
- Refetch-from-Google is dry-run: returns a diff of what the next real
sync would do, but performs zero ``user_group_members`` writes.
"""
from __future__ import annotations
import hashlib
import logging
import os
from typing import Any, Dict, List, Optional
import duckdb
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from app.auth.dependencies import _get_db, get_current_user
from app.auth.jwt import verify_token
logger = logging.getLogger(__name__)
# Mounted at /me/debug. The prefix is intentionally short so the navbar
# link and the bookmarkable URL stay readable.
router = APIRouter(prefix="/me/debug", tags=["me-debug"])
templates = Jinja2Templates(directory="app/web/templates")
def is_debug_auth_enabled() -> bool:
"""True iff the env flag is one of the accepted truthy spellings.
Default off — production VMs leave the var unset, the page returns
404, and no debug surface exists. Dev/staging VMs set it to ``true``
in their .env (provisioned via the agnes-vm Terraform module).
"""
return os.environ.get("AGNES_DEBUG_AUTH", "").strip().lower() in (
"1", "true", "yes",
)
async def require_debug_auth_enabled() -> None:
"""Dependency: 404 unless the env flag is on. Returning 404 instead of
403 makes the route's existence undetectable in production — an
attacker scanning for diag endpoints can't distinguish "you're not
allowed" from "this Agnes doesn't ship the debug feature"."""
if not is_debug_auth_enabled():
raise HTTPException(status_code=404, detail="Not Found")
# ---------------------------------------------------------------------------
# Data assembly
# ---------------------------------------------------------------------------
def _token_fingerprint(token: Optional[str]) -> Optional[str]:
"""Short sha256 of the raw token, for log correlation.
The full hash isn't a credential (HMAC-SHA256 is one-way) but truncating
to 12 hex chars makes the displayed value visually distinct from the
raw token so screenshots can't accidentally leak the JWT.
"""
if not token:
return None
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:12]
def _read_session_token(request: Request) -> Optional[str]:
"""The session JWT lives in the ``access_token`` cookie (set by every
auth provider's callback). Authorization-header bearers are PATs and
are out of scope for this diagnostic — the page is for interactive
sessions."""
return request.cookies.get("access_token")
def _decoded_claims(token: Optional[str]) -> Optional[Dict[str, Any]]:
"""Return verified JWT claims (or ``None`` if missing/invalid).
Goes through the project's :func:`app.auth.jwt.verify_token` so an
expired or mis-signed token produces ``None`` rather than a partial
decode — same trust boundary the rest of the auth path uses.
"""
if not token:
return None
return verify_token(token)
def _user_memberships(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> List[Dict[str, Any]]:
"""Group memberships for the given user, with source labels and the
bound external_id (NULL for unbound groups). Sorted by group name so
the output is stable across reloads."""
# external_id is the v14 column. Tolerate its absence — the same
# template that ships in the v13-base PR #2 must also work on a v14
# install where the column exists.
has_ext = conn.execute(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = 'user_groups' AND column_name = 'external_id'"
).fetchone()
select_ext = "g.external_id" if has_ext else "NULL"
rows = conn.execute(
f"""SELECT g.id, g.name, g.is_system, {select_ext} AS external_id,
m.source, m.added_at, m.added_by
FROM user_group_members m
JOIN user_groups g ON g.id = m.group_id
WHERE m.user_id = ?
ORDER BY g.name""",
[user_id],
).fetchall()
cols = [d[0] for d in conn.description]
return [dict(zip(cols, r)) for r in rows]
def _accessible_grants(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> List[Dict[str, Any]]:
"""Resource grants the user can reach via at least one of their groups.
Distinct on (resource_type, resource_id) so a grant held by two of the
user's groups appears once.
The plain ``SELECT DISTINCT`` covers all SELECT-list columns, so listing
``via_group`` would re-double a grant reachable through two groups (and
inflate the "Distinct N grant(s)" count rendered by ``me_debug.html``).
DuckDB supports PostgreSQL's ``DISTINCT ON`` to dedupe on the leading
columns; the ORDER BY picks the alphabetically-first group as the
representative ``via_group`` for the row.
"""
rows = conn.execute(
"""SELECT DISTINCT ON (rg.resource_type, rg.resource_id)
rg.resource_type, rg.resource_id, g.name AS via_group
FROM resource_grants rg
JOIN user_group_members m ON m.group_id = rg.group_id
JOIN user_groups g ON g.id = rg.group_id
WHERE m.user_id = ?
ORDER BY rg.resource_type, rg.resource_id, g.name""",
[user_id],
).fetchall()
cols = [d[0] for d in conn.description]
return [dict(zip(cols, r)) for r in rows]
def _last_sync_summary(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> Dict[str, Any]:
"""Summary of the most recent google_sync run for this user, drawn from
user_group_members. Not authoritative timestamps (Google sync writes
DELETE+INSERT every login, so all rows share the same added_at), but
sufficient to answer "when did Agnes last hear from Google about me?"."""
row = conn.execute(
"""SELECT COUNT(*) AS n, MAX(added_at) AS last_at
FROM user_group_members
WHERE user_id = ? AND source = 'google_sync'""",
[user_id],
).fetchone()
n, last_at = row if row else (0, None)
return {
"google_sync_count": int(n or 0),
"last_added_at": str(last_at) if last_at else None,
}
# ---------------------------------------------------------------------------
# GET /me/debug — render the diagnostic page
# ---------------------------------------------------------------------------
@router.get("", response_class=HTMLResponse, name="me_debug_page")
async def me_debug_page(
request: Request,
_: None = Depends(require_debug_auth_enabled),
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
# Reuse the project's shared template-context builder so config /
# static_url / session / theme overrides are populated the same way
# every other HTML page gets them. Adding a debug page must not bypass
# the shared chrome.
from app.web.router import _build_context
raw_token = _read_session_token(request)
# Strip sensitive columns before handing the row to the template. The
# current me_debug.html only renders id/email/name/active/created_at, but
# passing the full row would let a future template edit (e.g. an admin
# adding `{{ user_record | tojson }}` while debugging) accidentally leak
# the password hash. Defense-in-depth — the module docstring at line 13
# explicitly establishes "Never render password hashes" as an invariant.
_SENSITIVE_USER_COLUMNS = (
"password_hash", "setup_token", "reset_token",
)
user_record_safe = {
k: v for k, v in user.items() if k not in _SENSITIVE_USER_COLUMNS
}
ctx = _build_context(
request, user=user_record_safe,
user_record=user_record_safe,
claims=_decoded_claims(raw_token),
token_fingerprint=_token_fingerprint(raw_token),
memberships=_user_memberships(user["id"], conn),
grants=_accessible_grants(user["id"], conn),
sync_summary=_last_sync_summary(user["id"], conn),
google_group_prefix=os.environ.get(
"AGNES_GOOGLE_GROUP_PREFIX", ""
).strip(),
)
return templates.TemplateResponse(request, "me_debug.html", ctx)
# ---------------------------------------------------------------------------
# POST /me/debug/refetch-groups — dry-run live Google fetch
# ---------------------------------------------------------------------------
@router.post("/refetch-groups", name="me_debug_refetch_groups")
async def me_debug_refetch_groups(
_: None = Depends(require_debug_auth_enabled),
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Re-issue ``fetch_user_groups`` for the current user and return a
diff against the cached ``user_group_members`` snapshot, *without*
writing anything. The "real" sync runs only at OAuth callback —
forcing a write here would let any logged-in user trigger a Google
Admin SDK call on demand, which is both noisy and a quota footgun.
"""
from app.auth.group_sync import fetch_user_groups
fetched = fetch_user_groups(user["email"])
# The function returns Optional[list] on the v14 branch and List[str]
# on earlier branches. Normalize either shape: ``None`` becomes an
# explicit soft-fail marker and a list passes through untouched.
soft_failed = fetched is None
fetched_list: List[str] = list(fetched) if fetched else []
prefix = os.environ.get("AGNES_GOOGLE_GROUP_PREFIX", "").strip().lower()
if prefix:
relevant = [g.lower() for g in fetched_list if g.lower().startswith(prefix)]
else:
relevant = [g.lower() for g in fetched_list]
# Current state — google_sync rows joined to user_groups for the
# external_id label (NULL on pre-v14 schemas; tolerate that).
has_ext = conn.execute(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = 'user_groups' AND column_name = 'external_id'"
).fetchone()
select_ext = "g.external_id" if has_ext else "NULL"
current_rows = conn.execute(
f"""SELECT g.name, {select_ext} AS external_id
FROM user_group_members m
JOIN user_groups g ON g.id = m.group_id
WHERE m.user_id = ? AND m.source = 'google_sync'
ORDER BY g.name""",
[user["id"]],
).fetchall()
current_external_ids = {
r[1].lower() for r in current_rows if r[1]
}
current_names = [r[0] for r in current_rows]
# Diff: prefix-relevant emails that have no matching external_id row
# (would be added) and current external_ids no longer in fetched set
# (would be removed).
fetched_set = set(relevant)
would_add = sorted(fetched_set - current_external_ids)
would_remove = sorted(current_external_ids - fetched_set) if has_ext else []
return {
"soft_failed": soft_failed,
"prefix": prefix or None,
"fetched": fetched_list,
"fetched_relevant": relevant,
"current_names": current_names,
"current_external_ids": sorted(current_external_ids),
"would_add": would_add,
"would_remove": would_remove,
"applied": False, # always — this endpoint never writes
}