* fix(security): RBAC filter for agnes_sessions matches both email local-part and user_id
The upload API (POST /api/upload/sessions) stores session files under
user_sessions/{user_id}/ (UUID), while the session collector uses the
OS username (email local-part). The session pipeline writes the directory
name verbatim into usage_session_summary.username, so the column can
contain either value depending on the ingestion path.
The RBAC filter in build_filter_clause previously only matched the email
local-part, missing sessions uploaded via the API. The fix adds an OR
condition so non-admin users see rows where username matches either their
email local-part or their user_id.
Closes #293
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): RBAC filter uses stable user_id instead of mutable email local-part
Closes #293
Previous fix used OR condition matching both email local-part and user_id
in the username column. This was fragile: email changes would break
filtering. This commit introduces a dedicated user_id column populated
by the session pipeline via resolve_user_id(), and switches the RBAC
filter to use it exclusively.
Changes:
- Schema v45: add user_id column to usage_session_summary and usage_events
- UsageProcessor: accept and store user_id in both tables
- runner.py: resolve_user_id() maps directory name to users.id UUID
(exact match for UUID dirs, email LIKE for local-part dirs)
- INTERNAL_TABLES: agnes_sessions/agnes_telemetry filter on user_id column
- build_filter_clause: simplified to WHERE user_id = '<uuid>' (no OR)
- me.py/admin_user_sessions.py: query by user_id OR username for
backward compatibility during transition
- USAGE_PROCESSOR_VERSION bumped 2→3 to trigger reprocessing/backfill
- Tests updated: 27 pass including new email-change resilience test
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(tests): bump schema version assertions 44→45
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(docs): correct resolve_user_id docstring, add TypeError comment
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address review — backward-compat OR, LIKE escaping, narrower TypeError
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(security): address code review — eliminate TypeError hack, add resolve_user_id tests
Co-Authored-By: zdenek.srotyr <zdenek.srotyr@keboola.com>
* fix(db): create user_id indexes in _v44_to_v45, not _SYSTEM_SCHEMA
_SYSTEM_SCHEMA runs before the migration ladder. On an upgrade from
v42/v43/v44, usage_events / usage_session_summary already exist without
the user_id column (CREATE TABLE IF NOT EXISTS is a no-op), so the
CREATE INDEX ... (user_id) lines in _SYSTEM_SCHEMA failed to bind and
aborted _ensure_schema — the app would not start post-upgrade. Move the
index creation to _v44_to_v45, which ADDs the column first. Same pattern
as the v41 audit_log indices.
* fix(usage): bump USAGE_PROCESSOR_VERSION 3→4 for user_id backfill
#303 shipped USAGE_PROCESSOR_VERSION=3 (release 0.54.12) for its
<command-name> slash extraction. This PR's 2→3 bump collided with it
on rebase, so the reprocess loop would not re-trigger to backfill the
new user_id column on deployments already running v3. Bump to 4.
* release: 0.54.13 — RBAC filter uses stable user_id (#293)
---------
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
193 lines
7 KiB
Python
193 lines
7 KiB
Python
"""Self-scoped user endpoints for the /home onboarding flow.
|
|
|
|
POST /api/me/onboarded toggles ``users.onboarded`` for the calling user
|
|
and writes an audit_log row distinguishing the trigger source:
|
|
|
|
- ``agnes_init`` — fired by the CLI's ``agnes init`` final step.
|
|
- ``self_acknowledged`` — fired by the on-page "I've already set this up"
|
|
button shown to users who set up locally before /home shipped.
|
|
- ``self_unmark`` — fired by the on-page "Mark me as offboarded"
|
|
button (visible once the user is onboarded).
|
|
|
|
The body's optional ``onboarded`` field defaults to ``True`` for
|
|
backward compat with existing ``agnes init`` calls. Pass ``false`` to
|
|
flip back — useful when an analyst wipes their workspace and wants the
|
|
inline install steps back, or when an operator demos the not-onboarded
|
|
view without an SQL UPDATE.
|
|
|
|
Idempotent — a second call still returns 200 and writes a second audit
|
|
row, so duplicate fires are visible without breaking the client. See
|
|
origin: docs/brainstorms/home-page-requirements.md §2 + §6.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Literal
|
|
|
|
import duckdb
|
|
from fastapi import APIRouter, Depends
|
|
from pydantic import BaseModel
|
|
|
|
from app.auth.dependencies import _get_db, get_current_user
|
|
from src.repositories.audit import AuditRepository
|
|
|
|
router = APIRouter(prefix="/api/me", tags=["me"])
|
|
|
|
|
|
class OnboardedRequest(BaseModel):
|
|
source: Literal["agnes_init", "self_acknowledged", "self_unmark"] = "agnes_init"
|
|
onboarded: bool = True
|
|
|
|
|
|
@router.post("/onboarded")
|
|
async def post_onboarded(
|
|
body: OnboardedRequest = OnboardedRequest(),
|
|
user: dict = Depends(get_current_user),
|
|
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
|
):
|
|
target = bool(body.onboarded)
|
|
conn.execute(
|
|
"UPDATE users SET onboarded = ? WHERE id = ?",
|
|
[target, user["id"]],
|
|
)
|
|
AuditRepository(conn).log(
|
|
user_id=user["id"],
|
|
action="user_onboarded" if target else "user_offboarded",
|
|
params={"source": body.source},
|
|
result="ok",
|
|
)
|
|
return {"status": "ok", "onboarded": target}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GET /api/me/home-stats — backing data for the /home status frame
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
_WINDOW_INTERVALS = {
|
|
"24h": "INTERVAL 24 HOUR",
|
|
"7d": "INTERVAL 7 DAY",
|
|
}
|
|
|
|
|
|
def _username_for_stats(user: dict) -> str:
|
|
"""Map a users row to the filesystem username used by the session
|
|
collector and stored in ``usage_session_summary.username``.
|
|
|
|
Mirrors ``app.api.admin_user_sessions._username_from_user``: the
|
|
session collector writes JSONL under the OS username of the agent
|
|
process which, for current deployments, equals the email local-part.
|
|
Kept inline here so this endpoint has no cross-module dependency on
|
|
an admin-only helper; if the mapping evolves both copies must update.
|
|
"""
|
|
email: str = user.get("email", "") or ""
|
|
return email.split("@")[0] if "@" in email else email
|
|
|
|
|
|
def compute_home_stats(conn: duckdb.DuckDBPyConnection, user: dict, window: str = "24h") -> dict:
|
|
"""Pure helper that returns the home-stats payload for the given user.
|
|
|
|
Shared by the HTTP endpoint and the /home Jinja handler (server-side
|
|
initial render). Unknown windows clamp to ``24h`` so callers never
|
|
need to pre-validate. Returns a dict with ISO-stringified
|
|
``last_pull_at`` (or None) so the same shape works for both JSON
|
|
serialization and Jinja rendering.
|
|
"""
|
|
interval = _WINDOW_INTERVALS.get(window)
|
|
if interval is None:
|
|
window = "24h"
|
|
interval = _WINDOW_INTERVALS["24h"]
|
|
|
|
username = _username_for_stats(user)
|
|
uid = user.get("id") or ""
|
|
|
|
# f-string interpolates only the validated interval literal above;
|
|
# all user-controlled input flows through bound parameters.
|
|
# Match on both user_id (stable, populated by v45 pipeline) and
|
|
# username (legacy rows before v45 backfill) so stats are complete
|
|
# during the transition period.
|
|
sql = f"""
|
|
WITH win AS (
|
|
SELECT current_timestamp - {interval} AS since
|
|
),
|
|
sess AS (
|
|
SELECT
|
|
COUNT(*) AS sessions,
|
|
COALESCE(SUM(user_messages), 0) AS prompts,
|
|
COALESCE(SUM(input_tokens), 0) AS input_tokens,
|
|
COALESCE(SUM(output_tokens), 0) AS output_tokens,
|
|
COALESCE(SUM(cache_read_tokens), 0) AS cache_read,
|
|
COALESCE(SUM(cache_creation_tokens), 0) AS cache_creation
|
|
FROM usage_session_summary, win
|
|
WHERE (user_id = ? OR username = ?)
|
|
AND started_at >= win.since
|
|
),
|
|
proj AS (
|
|
SELECT COUNT(DISTINCT cwd) AS projects
|
|
FROM usage_events, win
|
|
WHERE (user_id = ? OR username = ?)
|
|
AND cwd IS NOT NULL
|
|
AND occurred_at >= win.since
|
|
),
|
|
u AS (
|
|
SELECT last_pull_at FROM users WHERE id = ?
|
|
)
|
|
SELECT
|
|
u.last_pull_at,
|
|
sess.sessions, sess.prompts,
|
|
sess.input_tokens, sess.output_tokens,
|
|
sess.cache_read, sess.cache_creation,
|
|
proj.projects
|
|
FROM u, sess, proj
|
|
"""
|
|
row = conn.execute(sql, [uid, username, uid, username, uid]).fetchone()
|
|
|
|
if row is None:
|
|
return {
|
|
"window": window,
|
|
"last_pull_at": None,
|
|
"sessions": 0,
|
|
"prompts": 0,
|
|
"tokens": {
|
|
"input": 0,
|
|
"output": 0,
|
|
"cache_read": 0,
|
|
"cache_creation": 0,
|
|
"total": 0,
|
|
},
|
|
"projects": 0,
|
|
}
|
|
|
|
(last_pull_at, sessions, prompts, input_t, output_t, cache_read, cache_creation, projects) = row
|
|
return {
|
|
"window": window,
|
|
"last_pull_at": last_pull_at.isoformat() if last_pull_at else None,
|
|
"sessions": int(sessions or 0),
|
|
"prompts": int(prompts or 0),
|
|
"tokens": {
|
|
"input": int(input_t or 0),
|
|
"output": int(output_t or 0),
|
|
"cache_read": int(cache_read or 0),
|
|
"cache_creation": int(cache_creation or 0),
|
|
"total": int((input_t or 0) + (output_t or 0) + (cache_read or 0) + (cache_creation or 0)),
|
|
},
|
|
"projects": int(projects or 0),
|
|
}
|
|
|
|
|
|
@router.get("/home-stats")
|
|
async def get_home_stats(
|
|
window: str = "24h",
|
|
user: dict = Depends(get_current_user),
|
|
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
|
):
|
|
"""Return the five counters rendered in the /home status frame for
|
|
the calling user, over a 24-hour or 7-day window.
|
|
|
|
Single round-trip: one DuckDB query joins ``users``,
|
|
``usage_session_summary``, and ``usage_events`` so the homepage
|
|
renders without N+1. Missing rows (new user, no telemetry yet)
|
|
surface as zeros / null rather than 404 — the frame still renders
|
|
cleanly for first-day analysts.
|
|
"""
|
|
return compute_home_stats(conn, user, window)
|