This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
220 lines
8.1 KiB
Python
220 lines
8.1 KiB
Python
"""FastAPI auth dependencies — current user resolution.
|
|
|
|
Authorization helpers (require_admin, require_resource_access) live in
|
|
``app.auth.access`` to avoid a circular import — they need ``get_current_user``
|
|
from this module and ``_get_db``, which both come from here.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
import duckdb
|
|
from fastapi import Depends, HTTPException, Header, Request, status
|
|
|
|
from app.auth.jwt import verify_token
|
|
from src.db import get_system_db
|
|
from src.repositories.users import UserRepository
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default dev user used when LOCAL_DEV_MODE=1. Seeded at startup by app/main.py.
|
|
LOCAL_DEV_DEFAULT_EMAIL = "dev@localhost"
|
|
|
|
# Single-slot cache for the parsed LOCAL_DEV_GROUPS value, keyed by the raw env
|
|
# string. Avoids re-parsing JSON on every authenticated request without the
|
|
# surprise of test isolation issues — when the env changes (typical in tests),
|
|
# the key changes and the cache transparently re-parses.
|
|
_LOCAL_DEV_GROUPS_CACHE: tuple[str, list[dict]] | None = None
|
|
|
|
# Map pat_resolver.ResolutionReason → HTTP 401 `detail` string. Preserves the
|
|
# specific user-facing messages that existed before the pat_resolver refactor
|
|
# (Account deactivated, Token revoked, ...) so tests and admin UX that grep
|
|
# for these phrases keep working.
|
|
_AUTH_DETAIL_BY_REASON = {
|
|
"deactivated": "Account deactivated",
|
|
"user_not_found": "User not found",
|
|
"pat_unknown": "Token unknown",
|
|
"pat_revoked": "Token revoked",
|
|
"pat_expired": "Token expired",
|
|
"pat_mismatch": "Token mismatch",
|
|
"invalid_token": "Invalid or expired token",
|
|
"no_token": "Invalid or expired token",
|
|
}
|
|
|
|
|
|
def is_local_dev_mode() -> bool:
|
|
"""True when LOCAL_DEV_MODE=1 — unsafe for production, bypasses auth."""
|
|
return os.environ.get("LOCAL_DEV_MODE", "").lower() in ("1", "true", "yes")
|
|
|
|
|
|
def get_local_dev_email() -> str:
|
|
"""Email of the auto-logged-in dev user. Configurable via LOCAL_DEV_USER_EMAIL."""
|
|
return os.environ.get("LOCAL_DEV_USER_EMAIL", LOCAL_DEV_DEFAULT_EMAIL)
|
|
|
|
|
|
def get_local_dev_groups() -> list[dict]:
|
|
"""Mock Google Workspace groups for the dev user when LOCAL_DEV_MODE is on.
|
|
|
|
Reads ``LOCAL_DEV_GROUPS`` as a JSON array of objects matching the shape
|
|
produced by ``_fetch_google_groups`` — ``[{"id": "...", "name": "..."}]``.
|
|
Items must have a non-empty ``id``; ``name`` defaults to ``id`` when
|
|
omitted. Extra fields are preserved verbatim so future group attributes
|
|
(roles, labels, …) can be mocked without touching this parser.
|
|
|
|
Returns ``[]`` on missing/empty/malformed input — dev mock must never
|
|
break the dev flow. Malformed input is logged at WARNING.
|
|
|
|
Cached single-slot: re-parses only when the raw env-var value changes.
|
|
"""
|
|
global _LOCAL_DEV_GROUPS_CACHE
|
|
raw = os.environ.get("LOCAL_DEV_GROUPS", "").strip()
|
|
if _LOCAL_DEV_GROUPS_CACHE is not None and _LOCAL_DEV_GROUPS_CACHE[0] == raw:
|
|
return _LOCAL_DEV_GROUPS_CACHE[1]
|
|
result = _parse_local_dev_groups(raw)
|
|
_LOCAL_DEV_GROUPS_CACHE = (raw, result)
|
|
return result
|
|
|
|
|
|
def _parse_local_dev_groups(raw: str) -> list[dict]:
|
|
if not raw:
|
|
return []
|
|
try:
|
|
parsed = json.loads(raw)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning("LOCAL_DEV_GROUPS is not valid JSON, ignoring: %s", e)
|
|
return []
|
|
if not isinstance(parsed, list):
|
|
logger.warning(
|
|
"LOCAL_DEV_GROUPS must be a JSON array, got %s — ignoring",
|
|
type(parsed).__name__,
|
|
)
|
|
return []
|
|
out: list[dict] = []
|
|
for item in parsed:
|
|
if not isinstance(item, dict) or not item.get("id"):
|
|
logger.warning(
|
|
"LOCAL_DEV_GROUPS item must be an object with 'id', skipping: %r",
|
|
item,
|
|
)
|
|
continue
|
|
# Don't mutate the parsed input — keeps the parser pure so the cache
|
|
# value stays a fresh list on each rebuild.
|
|
out.append({**item, "name": item.get("name") or item["id"]})
|
|
return out
|
|
|
|
|
|
def _get_db():
|
|
conn = get_system_db()
|
|
try:
|
|
yield conn
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _client_ip(request: Optional[Request]) -> Optional[str]:
|
|
"""Return the request's client IP, preferring the first hop of X-Forwarded-For.
|
|
|
|
Trust model: this deployment runs behind Caddy (see repo Caddyfile), which
|
|
strips incoming X-Forwarded-For and sets its own. The leftmost hop is
|
|
therefore trustworthy. If the app is ever exposed directly to the internet
|
|
without a proxy, this value becomes client-settable and should only be
|
|
relied on for audit/diagnostics, never access control. Value is stored in
|
|
personal_access_tokens.last_used_ip and audit_log entries — informational
|
|
only, never authorization.
|
|
"""
|
|
if request is None:
|
|
return None
|
|
xff = request.headers.get("x-forwarded-for")
|
|
if xff:
|
|
return xff.split(",", 1)[0].strip() or None
|
|
client = getattr(request, "client", None)
|
|
return getattr(client, "host", None) if client else None
|
|
|
|
|
|
def _get_local_dev_user(conn: duckdb.DuckDBPyConnection) -> Optional[dict]:
|
|
"""Return the seeded dev user when LOCAL_DEV_MODE is on, else None."""
|
|
repo = UserRepository(conn)
|
|
user = repo.get_by_email(get_local_dev_email())
|
|
if not user:
|
|
logger.error(
|
|
"LOCAL_DEV_MODE is on but dev user %s is not seeded; expected app startup to seed it",
|
|
get_local_dev_email(),
|
|
)
|
|
return user
|
|
|
|
|
|
async def get_current_user(
|
|
request: Request = None,
|
|
authorization: Optional[str] = Header(None),
|
|
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
|
) -> dict:
|
|
"""Extract and validate JWT from Authorization header or cookie. Returns user dict.
|
|
|
|
No role hydration, no session caches — authorization is decided at gate
|
|
time by ``app.auth.access`` which reads ``user_group_members`` directly.
|
|
"""
|
|
if is_local_dev_mode():
|
|
user = _get_local_dev_user(conn)
|
|
if user:
|
|
return user
|
|
# Fall through to normal auth if seed missing — surfaces the bug
|
|
# instead of hiding it.
|
|
|
|
token = None
|
|
|
|
# Try Authorization header first
|
|
if authorization and authorization.startswith("Bearer "):
|
|
token = authorization.removeprefix("Bearer ")
|
|
|
|
# Fallback to cookie (for web UI after OAuth redirect)
|
|
if not token and request:
|
|
token = request.cookies.get("access_token")
|
|
|
|
if not token:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
detail="Missing or invalid Authorization header",
|
|
)
|
|
|
|
from app.auth.pat_resolver import resolve_token_to_user
|
|
user, reason = resolve_token_to_user(conn, token, request)
|
|
if user:
|
|
return user
|
|
raise HTTPException(
|
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
detail=_AUTH_DETAIL_BY_REASON.get(reason, "Invalid or expired token"),
|
|
)
|
|
|
|
|
|
async def get_optional_user(
|
|
request: Request = None,
|
|
authorization: Optional[str] = Header(None),
|
|
conn: duckdb.DuckDBPyConnection = Depends(_get_db),
|
|
) -> Optional[dict]:
|
|
"""Like get_current_user but returns None instead of 401 if no token."""
|
|
try:
|
|
return await get_current_user(request=request, authorization=authorization, conn=conn)
|
|
except HTTPException:
|
|
return None
|
|
|
|
|
|
async def require_session_token(request: Request, user: dict = Depends(get_current_user)) -> dict:
|
|
"""Like get_current_user but rejects PAT — for endpoints that must not
|
|
be callable via a long-lived CI token (e.g. creating new tokens, changing password)."""
|
|
auth = request.headers.get("authorization", "")
|
|
token = None
|
|
if auth.startswith("Bearer "):
|
|
token = auth.removeprefix("Bearer ")
|
|
if not token and request:
|
|
token = request.cookies.get("access_token")
|
|
if token:
|
|
from app.auth.jwt import verify_token
|
|
payload = verify_token(token) or {}
|
|
if payload.get("typ") == "pat":
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="This endpoint requires an interactive session, not a PAT",
|
|
)
|
|
return user
|