This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
135 lines
4.5 KiB
Python
135 lines
4.5 KiB
Python
"""Shared token → user resolution.
|
|
|
|
Both the JSON API (Bearer header / cookie) and the git smart-HTTP endpoint
|
|
(HTTP Basic where the password field carries the PAT) need the same chain:
|
|
|
|
verify JWT → user exists & active → if typ=pat: still valid in DB →
|
|
best-effort audit & last-used bookkeeping → return user dict.
|
|
|
|
Extracted from `app.auth.dependencies.get_current_user` so both paths run
|
|
identical checks. Returns `(user, reason)`:
|
|
|
|
- on success: `(user_dict, None)`
|
|
- on failure: `(None, reason)` where reason is one of the strings below
|
|
|
|
The reason lets `get_current_user` map to a specific HTTP 401 detail
|
|
(`"Account deactivated"`, `"Token revoked"`, ...) while the WSGI git router
|
|
can discard it and just treat any non-None reason as unauthenticated.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from typing import Literal, Optional, Tuple
|
|
|
|
import duckdb
|
|
from fastapi import Request
|
|
|
|
from app.auth.jwt import verify_token
|
|
from src.repositories.access_tokens import AccessTokenRepository
|
|
from src.repositories.users import UserRepository
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
ResolutionReason = Literal[
|
|
"no_token",
|
|
"invalid_token",
|
|
"user_not_found",
|
|
"deactivated",
|
|
"pat_unknown",
|
|
"pat_revoked",
|
|
"pat_expired",
|
|
"pat_mismatch",
|
|
]
|
|
|
|
|
|
def _client_ip(request: Optional[Request]) -> Optional[str]:
|
|
"""See app/auth/dependencies._client_ip — same trust model (Caddy-fronted)."""
|
|
if request is None:
|
|
return None
|
|
xff = request.headers.get("x-forwarded-for")
|
|
if xff:
|
|
return xff.split(",", 1)[0].strip() or None
|
|
client = getattr(request, "client", None)
|
|
return getattr(client, "host", None) if client else None
|
|
|
|
|
|
def resolve_token_to_user(
|
|
conn: duckdb.DuckDBPyConnection,
|
|
token: str,
|
|
request: Optional[Request] = None,
|
|
) -> Tuple[Optional[dict], Optional[ResolutionReason]]:
|
|
"""Validate a bearer token and return (user_dict, None) on success.
|
|
|
|
On failure returns `(None, reason)` — the reason identifies which check
|
|
failed so callers can map to a specific HTTP 401 detail. Side effects
|
|
(last_used_at update, first-use-from-new-ip audit) are best-effort and
|
|
never block authentication.
|
|
"""
|
|
if not token:
|
|
return None, "no_token"
|
|
|
|
payload = verify_token(token)
|
|
if not payload:
|
|
return None, "invalid_token"
|
|
|
|
user = UserRepository(conn).get_by_id(payload.get("sub", ""))
|
|
if not user:
|
|
return None, "user_not_found"
|
|
if not bool(user.get("active", True)):
|
|
return None, "deactivated"
|
|
|
|
if payload.get("typ") != "pat":
|
|
return user, None
|
|
|
|
# PAT: extra DB-backed validation (revoked/expired/unknown/hash).
|
|
tokens_repo = AccessTokenRepository(conn)
|
|
record = tokens_repo.get_by_id(payload.get("jti", ""))
|
|
if not record:
|
|
return None, "pat_unknown"
|
|
if record.get("revoked_at") is not None:
|
|
return None, "pat_revoked"
|
|
|
|
exp_at = record.get("expires_at")
|
|
if exp_at is not None:
|
|
if isinstance(exp_at, str):
|
|
exp_at = datetime.fromisoformat(exp_at)
|
|
if exp_at.tzinfo is None:
|
|
exp_at = exp_at.replace(tzinfo=timezone.utc)
|
|
if datetime.now(timezone.utc) > exp_at:
|
|
return None, "pat_expired"
|
|
|
|
# Defense-in-depth: stored token_hash must match sha256(bearer JWT).
|
|
# Protects against a forged-but-unrevoked JWT using a stolen signing key.
|
|
stored_hash = record.get("token_hash")
|
|
if stored_hash:
|
|
actual = hashlib.sha256(token.encode()).hexdigest()
|
|
if actual != stored_hash:
|
|
return None, "pat_mismatch"
|
|
|
|
# First-use-from-new-IP audit entry (#12 acceptance criterion).
|
|
# Only emit when the IP changes on a *subsequent* use — the very
|
|
# first use of a token is not surprising and doesn't need an entry.
|
|
current_ip = _client_ip(request)
|
|
previous_ip = record.get("last_used_ip")
|
|
already_used = record.get("last_used_at") is not None
|
|
if already_used and current_ip and current_ip != previous_ip:
|
|
try:
|
|
from src.repositories.audit import AuditRepository
|
|
AuditRepository(conn).log(
|
|
user_id=user["id"],
|
|
action="token.first_use_new_ip",
|
|
resource=f"token:{payload['jti']}",
|
|
params={"ip": current_ip, "previous_ip": previous_ip},
|
|
)
|
|
except Exception:
|
|
pass # audit failure must not block auth
|
|
|
|
try:
|
|
tokens_repo.mark_used(payload["jti"], ip=current_ip)
|
|
except Exception:
|
|
pass
|
|
|
|
return user, None
|