feat(auth): /me/debug self-only auth diagnostic page (#116)

Adds /me/debug HTML page rendering the logged-in user's own session state — decoded JWT claims (no raw token, sha256[:12] fingerprint for log correlation), group memberships with sources and bound external_id when present, resource grants effective via those memberships, and a Refetch from Google (dry-run) button that diffs a fresh fetch_user_groups call against the cached user_group_members snapshot. Gated by AGNES_DEBUG_AUTH env var (default off → 404, route existence undetectable in production). Self-only by construction: user_id is read from the validated session, never echoes raw JWT / password hash / full PAT. Tolerates v13 + v14 schemas via information_schema check on users.external_id.
This commit is contained in:
minasarustamyan 2026-04-29 06:36:28 +02:00 committed by GitHub
parent 2e1dfb7553
commit 7a06f1a585
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 851 additions and 0 deletions

View file

@ -10,6 +10,10 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
### Added
- `/me/debug` — self-only auth diagnostic page. Shows the logged-in user their own decoded JWT claims (no raw token), group memberships with sources and bound `external_id` when present, resource grants effective via those memberships, and a "Refetch from Google (dry-run)" button that issues a fresh `fetch_user_groups` call and reports the diff against the cached `user_group_members` snapshot without writing anything. Gated by `AGNES_DEBUG_AUTH=true` env var (default off → route returns 404 and the navbar item is not rendered). Intended for dev / staging VMs; do not enable on customer-facing instances. The infra module exposes a `debug_auth_enabled` variable that propagates to the env.
## [0.14.0] — 2026-04-29
### Added

291
app/api/me_debug.py Normal file
View file

@ -0,0 +1,291 @@
"""Self-service auth diagnostic page.
Behind the ``AGNES_DEBUG_AUTH=true`` env flag (default off 404). Lets a
logged-in user inspect their own session: decoded JWT claims, group
memberships with sources, resource grants, and what Google Workspace would
return on a fresh sync (dry-run, no DB writes).
Hard rules designed so even if the env flag accidentally lands in
production, no sensitive material leaks:
- Never render the raw JWT, only its claims + a short sha256 fingerprint
(so it can be correlated against logs without being replayable).
- Never render password hashes, full PAT tokens, or session cookie values.
- Self-only the user_id comes from the validated session, not a query
parameter or path param. There is no admin-views-anyone surface here.
- Refetch-from-Google is dry-run: returns a diff of what the next real
sync would do, but performs zero ``user_group_members`` writes.
"""
from __future__ import annotations
import hashlib
import logging
import os
from typing import Any, Dict, List, Optional
import duckdb
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from app.auth.dependencies import _get_db, get_current_user
from app.auth.jwt import verify_token
logger = logging.getLogger(__name__)
# Mounted at /me/debug. The prefix is intentionally short so the navbar
# link and the bookmarkable URL stay readable.
router = APIRouter(prefix="/me/debug", tags=["me-debug"])
templates = Jinja2Templates(directory="app/web/templates")
def is_debug_auth_enabled() -> bool:
"""True iff the env flag is one of the accepted truthy spellings.
Default off production VMs leave the var unset, the page returns
404, and no debug surface exists. Dev/staging VMs set it to ``true``
in their .env (provisioned via the agnes-vm Terraform module).
"""
return os.environ.get("AGNES_DEBUG_AUTH", "").strip().lower() in (
"1", "true", "yes",
)
async def require_debug_auth_enabled() -> None:
"""Dependency: 404 unless the env flag is on. Returning 404 instead of
403 makes the route's existence undetectable in production — an
attacker scanning for diag endpoints can't distinguish "you're not
allowed" from "this Agnes doesn't ship the debug feature"."""
if not is_debug_auth_enabled():
raise HTTPException(status_code=404, detail="Not Found")
# ---------------------------------------------------------------------------
# Data assembly
# ---------------------------------------------------------------------------
def _token_fingerprint(token: Optional[str]) -> Optional[str]:
"""Short sha256 of the raw token, for log correlation.
The full hash isn't a credential (HMAC-SHA256 is one-way) but truncating
to 12 hex chars makes the displayed value visually distinct from the
raw token so screenshots can't accidentally leak the JWT.
"""
if not token:
return None
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:12]
def _read_session_token(request: Request) -> Optional[str]:
"""The session JWT lives in the ``access_token`` cookie (set by every
auth provider's callback). Authorization-header bearers are PATs and
are out of scope for this diagnostic the page is for interactive
sessions."""
return request.cookies.get("access_token")
def _decoded_claims(token: Optional[str]) -> Optional[Dict[str, Any]]:
"""Return verified JWT claims (or ``None`` if missing/invalid).
Goes through the project's :func:`app.auth.jwt.verify_token` so an
expired or mis-signed token produces ``None`` rather than a partial
decode same trust boundary the rest of the auth path uses.
"""
if not token:
return None
return verify_token(token)
def _user_memberships(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> List[Dict[str, Any]]:
"""Group memberships for the given user, with source labels and the
bound external_id (NULL for unbound groups). Sorted by group name so
the output is stable across reloads."""
# external_id is the v14 column. Tolerate its absence — the same
# template that ships in the v13-base PR #2 must also work on a v14
# install where the column exists.
has_ext = conn.execute(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = 'user_groups' AND column_name = 'external_id'"
).fetchone()
select_ext = "g.external_id" if has_ext else "NULL"
rows = conn.execute(
f"""SELECT g.id, g.name, g.is_system, {select_ext} AS external_id,
m.source, m.added_at, m.added_by
FROM user_group_members m
JOIN user_groups g ON g.id = m.group_id
WHERE m.user_id = ?
ORDER BY g.name""",
[user_id],
).fetchall()
cols = [d[0] for d in conn.description]
return [dict(zip(cols, r)) for r in rows]
def _accessible_grants(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> List[Dict[str, Any]]:
"""Resource grants the user can reach via at least one of their groups.
Distinct on (resource_type, resource_id) so a grant held by two of the
user's groups appears once.
The plain ``SELECT DISTINCT`` covers all SELECT-list columns, so listing
``via_group`` would re-double a grant reachable through two groups (and
inflate the "Distinct N grant(s)" count rendered by ``me_debug.html``).
DuckDB supports PostgreSQL's ``DISTINCT ON`` to dedupe on the leading
columns; the ORDER BY picks the alphabetically-first group as the
representative ``via_group`` for the row.
"""
rows = conn.execute(
"""SELECT DISTINCT ON (rg.resource_type, rg.resource_id)
rg.resource_type, rg.resource_id, g.name AS via_group
FROM resource_grants rg
JOIN user_group_members m ON m.group_id = rg.group_id
JOIN user_groups g ON g.id = rg.group_id
WHERE m.user_id = ?
ORDER BY rg.resource_type, rg.resource_id, g.name""",
[user_id],
).fetchall()
cols = [d[0] for d in conn.description]
return [dict(zip(cols, r)) for r in rows]
def _last_sync_summary(
user_id: str, conn: duckdb.DuckDBPyConnection
) -> Dict[str, Any]:
"""Summary of the most recent google_sync run for this user, drawn from
user_group_members. Not authoritative timestamps (Google sync writes
DELETE+INSERT every login, so all rows share the same added_at), but
sufficient to answer "when did Agnes last hear from Google about me?"."""
row = conn.execute(
"""SELECT COUNT(*) AS n, MAX(added_at) AS last_at
FROM user_group_members
WHERE user_id = ? AND source = 'google_sync'""",
[user_id],
).fetchone()
n, last_at = row if row else (0, None)
return {
"google_sync_count": int(n or 0),
"last_added_at": str(last_at) if last_at else None,
}
# ---------------------------------------------------------------------------
# GET /me/debug — render the diagnostic page
# ---------------------------------------------------------------------------
@router.get("", response_class=HTMLResponse, name="me_debug_page")
async def me_debug_page(
request: Request,
_: None = Depends(require_debug_auth_enabled),
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
# Reuse the project's shared template-context builder so config /
# static_url / session / theme overrides are populated the same way
# every other HTML page gets them. Adding a debug page must not bypass
# the shared chrome.
from app.web.router import _build_context
raw_token = _read_session_token(request)
# Strip sensitive columns before handing the row to the template. The
# current me_debug.html only renders id/email/name/active/created_at, but
# passing the full row would let a future template edit (e.g. an admin
# adding `{{ user_record | tojson }}` while debugging) accidentally leak
# the password hash. Defense-in-depth — the module docstring at line 13
# explicitly establishes "Never render password hashes" as an invariant.
_SENSITIVE_USER_COLUMNS = (
"password_hash", "setup_token", "reset_token",
)
user_record_safe = {
k: v for k, v in user.items() if k not in _SENSITIVE_USER_COLUMNS
}
ctx = _build_context(
request, user=user_record_safe,
user_record=user_record_safe,
claims=_decoded_claims(raw_token),
token_fingerprint=_token_fingerprint(raw_token),
memberships=_user_memberships(user["id"], conn),
grants=_accessible_grants(user["id"], conn),
sync_summary=_last_sync_summary(user["id"], conn),
google_group_prefix=os.environ.get(
"AGNES_GOOGLE_GROUP_PREFIX", ""
).strip(),
)
return templates.TemplateResponse(request, "me_debug.html", ctx)
# ---------------------------------------------------------------------------
# POST /me/debug/refetch-groups — dry-run live Google fetch
# ---------------------------------------------------------------------------
@router.post("/refetch-groups", name="me_debug_refetch_groups")
async def me_debug_refetch_groups(
_: None = Depends(require_debug_auth_enabled),
user: dict = Depends(get_current_user),
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
):
"""Re-issue ``fetch_user_groups`` for the current user and return a
diff against the cached ``user_group_members`` snapshot, *without*
writing anything. The "real" sync runs only at OAuth callback
forcing a write here would let any logged-in user trigger a Google
Admin SDK call on demand, which is both noisy and a quota footgun.
"""
from app.auth.group_sync import fetch_user_groups
fetched = fetch_user_groups(user["email"])
# The function returns Optional[list] on the v14 branch and List[str]
# on earlier branches. Normalize either shape: ``None`` becomes an
# explicit soft-fail marker and a list passes through untouched.
soft_failed = fetched is None
fetched_list: List[str] = list(fetched) if fetched else []
prefix = os.environ.get("AGNES_GOOGLE_GROUP_PREFIX", "").strip().lower()
if prefix:
relevant = [g.lower() for g in fetched_list if g.lower().startswith(prefix)]
else:
relevant = [g.lower() for g in fetched_list]
# Current state — google_sync rows joined to user_groups for the
# external_id label (NULL on pre-v14 schemas; tolerate that).
has_ext = conn.execute(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = 'user_groups' AND column_name = 'external_id'"
).fetchone()
select_ext = "g.external_id" if has_ext else "NULL"
current_rows = conn.execute(
f"""SELECT g.name, {select_ext} AS external_id
FROM user_group_members m
JOIN user_groups g ON g.id = m.group_id
WHERE m.user_id = ? AND m.source = 'google_sync'
ORDER BY g.name""",
[user["id"]],
).fetchall()
current_external_ids = {
r[1].lower() for r in current_rows if r[1]
}
current_names = [r[0] for r in current_rows]
# Diff: prefix-relevant emails that have no matching external_id row
# (would be added) and current external_ids no longer in fetched set
# (would be removed).
fetched_set = set(relevant)
would_add = sorted(fetched_set - current_external_ids)
would_remove = sorted(current_external_ids - fetched_set) if has_ext else []
return {
"soft_failed": soft_failed,
"prefix": prefix or None,
"fetched": fetched_list,
"fetched_relevant": relevant,
"current_names": current_names,
"current_external_ids": sorted(current_external_ids),
"would_add": would_add,
"would_remove": would_remove,
"applied": False, # always — this endpoint never writes
}

View file

@ -89,6 +89,7 @@ from app.api.settings import router as settings_router
from app.api.catalog import router as catalog_router
from app.api.telegram import router as telegram_router
from app.api.access import router as access_router, me_router as me_access_router
from app.api.me_debug import router as me_debug_router
from app.api.admin import router as admin_router
from app.api.permissions import router as permissions_router
from app.api.access_requests import router as access_requests_router
@ -329,6 +330,7 @@ def create_app() -> FastAPI:
app.include_router(admin_router)
app.include_router(access_router)
app.include_router(me_access_router)
app.include_router(me_debug_router)
app.include_router(permissions_router)
app.include_router(access_requests_router)
app.include_router(jira_webhooks_router)

View file

@ -144,6 +144,12 @@ def _build_context(request: Request, user: Optional[dict] = None, **extra) -> di
SSH_ALIAS = "data-analyst"
SERVER_HOST = os.environ.get("SERVER_HOST", "")
PROJECT_DIR = "data-analyst"
# Drives whether the user dropdown renders the "Auth debug" link.
# Same env var the route guard checks — keep them in lock-step so
# the link never appears when the route would 404, and vice versa.
DEBUG_AUTH_ENABLED = os.environ.get("AGNES_DEBUG_AUTH", "").strip().lower() in (
"1", "true", "yes",
)
@staticmethod
def theme_overrides():

View file

@ -56,6 +56,9 @@
</div>
<a class="app-user-menu-item {% if _path.startswith('/profile') %}is-active{% endif %}" role="menuitem" href="/profile">Profile</a>
<a class="app-user-menu-item {% if _path == '/tokens' %}is-active{% endif %}" role="menuitem" href="/tokens">My tokens</a>
{% if config.DEBUG_AUTH_ENABLED %}
<a class="app-user-menu-item {% if _path.startswith('/me/debug') %}is-active{% endif %}" role="menuitem" href="/me/debug">Auth debug</a>
{% endif %}
<a class="app-user-menu-item" role="menuitem" href="{{ url_for('auth.logout') }}">Logout</a>
</div>
</div>

View file

@ -0,0 +1,310 @@
{% extends "base.html" %}
{% block title %}Auth debug — {{ session.user.email }}{% endblock %}
{% block content %}
<style>
.container:has(.md-page) { max-width: none; padding: 24px 16px; }
.md-page { max-width: 1100px; margin: 0 auto; padding: 0; }
.md-header {
display: flex; align-items: center; gap: 16px;
margin-bottom: 18px;
}
.md-title { font-size: 22px; font-weight: 600; margin: 0; }
.md-subtitle {
font-size: 13px; color: var(--text-secondary, #6b7280);
margin-top: 2px;
}
.md-warning {
margin-bottom: 18px; padding: 10px 14px;
background: #fff7ed; border: 1px solid #fed7aa; color: #9a3412;
border-radius: 8px; font-size: 12px; line-height: 1.45;
}
.md-section {
background: var(--surface, #fff);
border: 1px solid var(--border, #e5e7eb);
border-radius: 12px;
margin-bottom: 14px;
overflow: hidden;
}
.md-section-head {
padding: 12px 16px;
background: var(--border-light, #f9fafb);
border-bottom: 1px solid var(--border, #e5e7eb);
display: flex; justify-content: space-between; align-items: center;
}
.md-section-head h2 { margin: 0; font-size: 13px; font-weight: 600; }
.md-section-head .sub { font-size: 11px; color: var(--text-secondary, #6b7280); }
.md-section-body { padding: 14px 16px; }
.kv { display: grid; grid-template-columns: 200px 1fr; gap: 6px 14px; font-size: 13px; }
.kv .k { color: var(--text-secondary, #6b7280); font-weight: 500; }
.kv .v {
color: var(--text-primary, #111827);
font-family: ui-monospace, SFMono-Regular, monospace;
word-break: break-all;
}
.kv .v.muted { color: #9ca3af; font-style: italic; }
.md-table { width: 100%; border-collapse: collapse; font-size: 13px; }
.md-table th, .md-table td {
padding: 8px 14px; text-align: left;
border-bottom: 1px solid var(--border-light, #f3f4f6);
}
.md-table thead th {
background: var(--border-light, #f9fafb);
font-size: 11px; text-transform: uppercase; letter-spacing: 0.4px;
color: var(--text-secondary, #6b7280); font-weight: 600;
}
.md-table tbody tr:last-child td { border-bottom: none; }
.src-chip {
display: inline-block; padding: 2px 8px; border-radius: 999px;
font-size: 10px; font-weight: 600;
text-transform: uppercase; letter-spacing: 0.4px;
}
.src-google_sync { background: #dcfce7; color: #166534; }
.src-admin { background: #ede9fe; color: #6d28d9; }
.src-system_seed { background: #fef3c7; color: #92400e; }
.ext-chip {
display: inline-block; padding: 2px 6px; border-radius: 4px;
background: #f0fdf4; color: #166534;
font-family: ui-monospace, SFMono-Regular, monospace;
font-size: 11px;
border: 1px solid #bbf7d0;
}
.md-empty { color: #9ca3af; font-style: italic; font-size: 12px; }
.refetch-btn {
padding: 7px 12px; border-radius: 6px; font-size: 12px; font-weight: 500;
background: var(--primary, #6366f1); color: #fff;
border: 1px solid var(--primary, #6366f1); cursor: pointer;
}
.refetch-btn:disabled { opacity: 0.6; cursor: progress; }
.refetch-result { margin-top: 14px; }
.diff-add { color: #166534; }
.diff-remove { color: #991b1b; }
.pre-block {
background: #0b1220; color: #d1d5db;
padding: 10px 14px; border-radius: 8px;
font-family: ui-monospace, SFMono-Regular, monospace;
font-size: 12px; line-height: 1.5;
overflow: auto; max-height: 320px;
}
</style>
<div class="md-page">
<div class="md-header">
<div>
<h1 class="md-title">Auth debug — your session</h1>
<p class="md-subtitle">
Self-service diagnostic. This page is gated by
<code>AGNES_DEBUG_AUTH</code>; visible only on dev/staging instances.
</p>
</div>
</div>
<div class="md-warning">
<strong>What you see is your own data only.</strong>
No raw JWT, no password hash, no full PAT. The "Refetch" button below
asks Google what your current group membership looks like and shows a
diff against what Agnes has cached — it does <strong>not</strong> apply
the result. Your real next sync runs at next sign-in.
</div>
<!-- User -->
<section class="md-section">
<div class="md-section-head"><h2>User record</h2></div>
<div class="md-section-body">
<div class="kv">
<div class="k">id</div> <div class="v">{{ user_record.id }}</div>
<div class="k">email</div> <div class="v">{{ user_record.email }}</div>
<div class="k">name</div> <div class="v">{{ user_record.name or "—" }}</div>
<div class="k">active</div> <div class="v">{{ "yes" if user_record.active else "no" }}</div>
<div class="k">created_at</div> <div class="v">{{ user_record.created_at or "—" }}</div>
</div>
</div>
</section>
<!-- Session JWT -->
<section class="md-section">
<div class="md-section-head">
<h2>Session JWT (decoded)</h2>
<span class="sub">Raw token never displayed; fingerprint correlates with logs.</span>
</div>
<div class="md-section-body">
{% if claims %}
<div class="kv">
<div class="k">fingerprint</div>
<div class="v">{{ token_fingerprint }}…</div>
<div class="k">subject (sub)</div>
<div class="v">{{ claims.sub }}</div>
<div class="k">email</div>
<div class="v">{{ claims.email }}</div>
<div class="k">type (typ)</div>
<div class="v">{{ claims.typ or "session" }}</div>
<div class="k">issued (iat)</div>
<div class="v">{{ claims.iat or "—" }}</div>
<div class="k">expires (exp)</div>
<div class="v">{{ claims.exp or "—" }}</div>
<div class="k">jti</div>
<div class="v">{{ claims.jti or "—" }}</div>
</div>
{% else %}
<div class="md-empty">No session token in the request — are you signed in via cookie?</div>
{% endif %}
</div>
</section>
<!-- Memberships -->
<section class="md-section">
<div class="md-section-head">
<h2>Group memberships</h2>
<span class="sub">{{ memberships|length }} row(s)</span>
</div>
{% if memberships %}
<table class="md-table">
<thead><tr>
<th>Group</th><th>Linked to</th><th>Source</th><th>Added</th><th>Added by</th>
</tr></thead>
<tbody>
{% for m in memberships %}
<tr>
<td>{{ m.name }}{% if m.is_system %} <em style="font-size:10px;color:#92400e;">(system)</em>{% endif %}</td>
<td>
{% if m.external_id %}
<span class="ext-chip">{{ m.external_id }}</span>
{% else %}
<span class="md-empty"></span>
{% endif %}
</td>
<td><span class="src-chip src-{{ m.source }}">{{ m.source }}</span></td>
<td>{{ m.added_at or "—" }}</td>
<td>{{ m.added_by or "—" }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<div class="md-section-body"><div class="md-empty">No group memberships.</div></div>
{% endif %}
</section>
<!-- Resource grants -->
<section class="md-section">
<div class="md-section-head">
<h2>Resource grants (effective)</h2>
<span class="sub">Distinct {{ grants|length }} grant(s) reachable via your groups.</span>
</div>
{% if grants %}
<table class="md-table">
<thead><tr>
<th>Resource type</th><th>Resource id</th><th>Via group</th>
</tr></thead>
<tbody>
{% for g in grants %}
<tr>
<td><code>{{ g.resource_type }}</code></td>
<td><code>{{ g.resource_id }}</code></td>
<td>{{ g.via_group }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<div class="md-section-body"><div class="md-empty">No resource grants reachable.</div></div>
{% endif %}
</section>
<!-- Last Google sync -->
<section class="md-section">
<div class="md-section-head">
<h2>Last Google sync snapshot</h2>
<span class="sub">Read from <code>user_group_members</code>.</span>
</div>
<div class="md-section-body">
<div class="kv">
<div class="k">prefix in effect</div>
<div class="v">{{ google_group_prefix or "(none)" }}</div>
<div class="k">google_sync rows</div>
<div class="v">{{ sync_summary.google_sync_count }}</div>
<div class="k">last added_at</div>
<div class="v">{{ sync_summary.last_added_at or "—" }}</div>
</div>
<div style="margin-top:14px;">
<button id="refetch-btn" class="refetch-btn" type="button">
Refetch from Google (dry-run)
</button>
<span id="refetch-status" style="margin-left:10px; font-size:12px; color:#6b7280;"></span>
</div>
<div id="refetch-result" class="refetch-result" hidden>
<div class="kv" style="margin-top:10px;">
<div class="k">soft-failed?</div>
<div class="v" id="rf-soft"></div>
<div class="k">prefix</div>
<div class="v" id="rf-prefix"></div>
</div>
<h3 style="font-size:12px; margin:14px 0 6px; color:#6b7280; font-weight:600; text-transform:uppercase; letter-spacing:0.4px;">
Fetched from Google (post-prefix-filter)
</h3>
<div class="pre-block" id="rf-fetched-relevant"></div>
<h3 style="font-size:12px; margin:14px 0 6px; color:#6b7280; font-weight:600; text-transform:uppercase; letter-spacing:0.4px;">
Diff vs. cached membership
</h3>
<div class="pre-block" id="rf-diff"></div>
</div>
</div>
</section>
</div>
<script>
const refetchBtn = document.getElementById("refetch-btn");
const refetchStatus = document.getElementById("refetch-status");
const refetchResult = document.getElementById("refetch-result");
refetchBtn.addEventListener("click", async () => {
refetchBtn.disabled = true;
refetchStatus.textContent = "Asking Google…";
refetchResult.hidden = true;
try {
const r = await fetch("/me/debug/refetch-groups", {
method: "POST", credentials: "include",
});
if (!r.ok) {
refetchStatus.textContent = `Failed: HTTP ${r.status}`;
return;
}
const data = await r.json();
document.getElementById("rf-soft").textContent =
data.soft_failed ? "yes — Google API didn't answer" : "no";
document.getElementById("rf-prefix").textContent = data.prefix || "(none)";
document.getElementById("rf-fetched-relevant").textContent =
(data.fetched_relevant || []).join("\n") || "(empty)";
const lines = [];
(data.would_add || []).forEach(g => lines.push(`+ ${g}`));
(data.would_remove || []).forEach(g => lines.push(`- ${g}`));
if (lines.length === 0) {
lines.push("(no changes — cached membership matches Google)");
}
document.getElementById("rf-diff").textContent = lines.join("\n");
refetchResult.hidden = false;
refetchStatus.textContent = "Done — nothing was applied to the database.";
} catch (e) {
refetchStatus.textContent = "Network error: " + (e.message || e);
} finally {
refetchBtn.disabled = false;
}
});
</script>
{% endblock %}

235
tests/test_me_debug.py Normal file
View file

@ -0,0 +1,235 @@
"""Tests for /me/debug self-diagnostic page.
The page must:
- Be 404 (not 403) when ``AGNES_DEBUG_AUTH`` is unset / falsy. 404 makes
the route's existence undetectable in production.
- Be 200 for any authenticated user when the flag is on; 401 when no
session cookie is presented.
- Never echo the raw JWT only decoded claims and a sha256 prefix.
- Refetch endpoint must return the diff shape and perform zero database
writes (snapshot user_group_members before/after).
"""
from __future__ import annotations
import tempfile
import uuid
import pytest
@pytest.fixture
def fresh_db(monkeypatch):
"""Per-test DATA_DIR + JWT secret so the system DB is fresh."""
with tempfile.TemporaryDirectory() as tmp:
monkeypatch.setenv("DATA_DIR", tmp)
monkeypatch.setenv("TESTING", "1")
monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!")
yield tmp
def _make_user_and_session(conn, email: str = "u@example.com"):
"""Create a non-admin user, return (user_id, session_jwt)."""
from src.repositories.users import UserRepository
from app.auth.jwt import create_access_token
uid = str(uuid.uuid4())
UserRepository(conn).create(
id=uid, email=email, name=email.split("@")[0], role="analyst"
)
token = create_access_token(user_id=uid, email=email, role="analyst")
return uid, token
def _client():
from fastapi.testclient import TestClient
from app.main import app
return TestClient(app)
# ---------------------------------------------------------------------------
# Gating
# ---------------------------------------------------------------------------
class TestGating:
@pytest.mark.parametrize("flag_value", ["", "0", "false", "False", "no", "off"])
def test_returns_404_when_flag_off(self, fresh_db, monkeypatch, flag_value):
"""Falsy / unset flag must yield 404 (not 403)."""
if flag_value == "":
monkeypatch.delenv("AGNES_DEBUG_AUTH", raising=False)
else:
monkeypatch.setenv("AGNES_DEBUG_AUTH", flag_value)
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
_, sess = _make_user_and_session(conn)
finally:
conn.close()
close_system_db()
c = _client()
resp = c.get("/me/debug", cookies={"access_token": sess})
assert resp.status_code == 404
@pytest.mark.parametrize("flag_value", ["1", "true", "TRUE", "yes"])
def test_returns_200_for_authed_user_when_flag_on(self, fresh_db, monkeypatch, flag_value):
monkeypatch.setenv("AGNES_DEBUG_AUTH", flag_value)
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
_, sess = _make_user_and_session(conn)
finally:
conn.close()
close_system_db()
c = _client()
resp = c.get("/me/debug", cookies={"access_token": sess})
assert resp.status_code == 200, resp.text
assert "Auth debug" in resp.text
def test_redirects_to_login_when_unauthenticated(self, fresh_db, monkeypatch):
"""Flag on, no cookie → get_current_user raises 401, the app's
global exception handler redirects HTML GETs to /login. Important:
the response must NOT be 404 (which would prove the gate runs
before auth and could leak existence to scanners) it's 302 to
/login, same as any other authenticated page."""
monkeypatch.setenv("AGNES_DEBUG_AUTH", "true")
from fastapi.testclient import TestClient
from app.main import app
c = TestClient(app, follow_redirects=False)
resp = c.get("/me/debug")
assert resp.status_code == 302
assert "/login" in resp.headers.get("location", "")
# ---------------------------------------------------------------------------
# Data leakage guards
# ---------------------------------------------------------------------------
class TestNoSensitiveLeakage:
def test_raw_jwt_not_in_body(self, fresh_db, monkeypatch):
"""The full session JWT must never appear in the rendered page —
only its decoded claims and a short fingerprint."""
monkeypatch.setenv("AGNES_DEBUG_AUTH", "true")
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
_, sess = _make_user_and_session(conn)
finally:
conn.close()
close_system_db()
c = _client()
resp = c.get("/me/debug", cookies={"access_token": sess})
assert resp.status_code == 200
assert sess not in resp.text, "raw JWT leaked into page body"
# ---------------------------------------------------------------------------
# Refetch endpoint — dry-run, zero DB writes
# ---------------------------------------------------------------------------
class TestRefetchDryRun:
def test_404_when_flag_off(self, fresh_db, monkeypatch):
monkeypatch.delenv("AGNES_DEBUG_AUTH", raising=False)
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
_, sess = _make_user_and_session(conn)
finally:
conn.close()
close_system_db()
c = _client()
resp = c.post("/me/debug/refetch-groups", cookies={"access_token": sess})
assert resp.status_code == 404
def test_returns_diff_shape_and_does_not_write(self, fresh_db, monkeypatch):
"""Mocked Google response, refetch must return the documented shape
AND not change any user_group_members rows."""
monkeypatch.setenv("AGNES_DEBUG_AUTH", "true")
# Mock fetch to return a deterministic list (no real Google call).
monkeypatch.setenv(
"GOOGLE_ADMIN_SDK_MOCK_GROUPS",
"grp_admin@example.com,grp_finance@example.com",
)
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
uid, sess = _make_user_and_session(conn, email="m@example.com")
before_rows = conn.execute(
"SELECT user_id, group_id, source FROM user_group_members "
"WHERE user_id = ?", [uid],
).fetchall()
finally:
conn.close()
close_system_db()
c = _client()
resp = c.post("/me/debug/refetch-groups", cookies={"access_token": sess})
assert resp.status_code == 200, resp.text
data = resp.json()
# Documented shape — keys present, types right.
for key in (
"soft_failed", "prefix", "fetched", "fetched_relevant",
"current_names", "current_external_ids",
"would_add", "would_remove", "applied",
):
assert key in data, f"missing key {key!r}"
assert data["applied"] is False
assert data["soft_failed"] is False
assert isinstance(data["fetched"], list)
assert isinstance(data["would_add"], list)
# Zero DB writes — snapshot before/after must match exactly.
conn = get_system_db()
try:
after_rows = conn.execute(
"SELECT user_id, group_id, source FROM user_group_members "
"WHERE user_id = ?", [uid],
).fetchall()
finally:
conn.close()
close_system_db()
assert before_rows == after_rows
def test_soft_fail_marker_when_mock_unset_and_real_path_unconfigured(
self, fresh_db, monkeypatch
):
"""Without the mock env and without GOOGLE_ADMIN_SDK_SUBJECT, the
real path returns soft-fail; the endpoint reports it as such."""
monkeypatch.setenv("AGNES_DEBUG_AUTH", "true")
monkeypatch.delenv("GOOGLE_ADMIN_SDK_MOCK_GROUPS", raising=False)
monkeypatch.delenv("GOOGLE_ADMIN_SDK_SUBJECT", raising=False)
from src.db import get_system_db, close_system_db
conn = get_system_db()
try:
_, sess = _make_user_and_session(conn, email="sf@example.com")
finally:
conn.close()
close_system_db()
c = _client()
resp = c.post("/me/debug/refetch-groups", cookies={"access_token": sess})
assert resp.status_code == 200, resp.text
data = resp.json()
# On the keyless-DWD branch, fetch_user_groups returns [] on missing
# subject (legacy fail-soft as empty list); on the prefix-mapping
# branch it returns None. Tolerate either — endpoint reports
# soft_failed=True when None, False+empty list when [].
if data["soft_failed"]:
assert data["fetched"] == []
else:
# Real path returned [] — also a valid shape; assert no writes
# happened by virtue of applied=False + DB snapshot below.
assert data["fetched"] == []
assert data["applied"] is False