This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
161 lines
6 KiB
Python
161 lines
6 KiB
Python
"""Google OAuth provider for FastAPI.
|
|
|
|
Group memberships are sourced via Application Default Credentials in
|
|
``app.auth.group_sync.fetch_user_groups`` (no per-user OAuth scope needed for
|
|
that path), so the OAuth flow only handles authentication and returns a
|
|
session JWT. Membership writes go to ``user_group_members``.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
|
|
from authlib.integrations.starlette_client import OAuth
|
|
from fastapi import APIRouter, Request
|
|
from fastapi.responses import RedirectResponse
|
|
|
|
from app.auth.jwt import create_access_token
|
|
from app.auth._common import safe_next_path
|
|
from app.instance_config import get_allowed_domains
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/auth/google", tags=["auth"])
|
|
|
|
oauth = OAuth()
|
|
|
|
GOOGLE_CLIENT_ID = os.environ.get("GOOGLE_CLIENT_ID", "")
|
|
GOOGLE_CLIENT_SECRET = os.environ.get("GOOGLE_CLIENT_SECRET", "")
|
|
|
|
|
|
def is_available() -> bool:
|
|
return bool(GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET)
|
|
|
|
|
|
def _setup_oauth():
|
|
if not is_available():
|
|
return
|
|
oauth.register(
|
|
name="google",
|
|
client_id=GOOGLE_CLIENT_ID,
|
|
client_secret=GOOGLE_CLIENT_SECRET,
|
|
server_metadata_url="https://accounts.google.com/.well-known/openid-configuration",
|
|
client_kwargs={"scope": "openid email profile"},
|
|
)
|
|
|
|
|
|
_setup_oauth()
|
|
|
|
|
|
@router.get("/login")
|
|
async def google_login(request: Request):
|
|
"""Redirect to Google OAuth.
|
|
|
|
Honors `?next=<path>` by stashing the sanitized value in the session so the
|
|
callback can redirect there instead of the default /dashboard. The session
|
|
is the right stash — OAuth flow is stateful and the `state` param is
|
|
managed by Authlib.
|
|
"""
|
|
if not is_available():
|
|
return RedirectResponse(url="/login?error=google_not_configured")
|
|
next_path = safe_next_path(request.query_params.get("next"), default="")
|
|
if next_path:
|
|
request.session["login_next"] = next_path
|
|
else:
|
|
# Clear any stale value from an earlier aborted attempt.
|
|
request.session.pop("login_next", None)
|
|
redirect_uri = str(request.url_for("google_callback"))
|
|
return await oauth.google.authorize_redirect(request, redirect_uri)
|
|
|
|
|
|
@router.get("/callback")
|
|
async def google_callback(request: Request):
|
|
"""Handle Google OAuth callback."""
|
|
if not is_available():
|
|
return RedirectResponse(url="/login?error=google_not_configured")
|
|
|
|
try:
|
|
token = await oauth.google.authorize_access_token(request)
|
|
user_info = token.get("userinfo", {})
|
|
email = user_info.get("email", "")
|
|
name = user_info.get("name", "")
|
|
|
|
if not email:
|
|
return RedirectResponse(url="/login?error=no_email")
|
|
|
|
# Domain check
|
|
allowed = get_allowed_domains()
|
|
if allowed:
|
|
domain = email.split("@")[-1]
|
|
if domain not in allowed:
|
|
return RedirectResponse(url="/login?error=domain_not_allowed")
|
|
|
|
# Find or create user, sync Workspace group memberships into
|
|
# user_group_members.
|
|
from src.db import get_system_db
|
|
from src.repositories.users import UserRepository
|
|
from src.repositories.user_groups import UserGroupsRepository
|
|
from src.repositories.user_group_members import UserGroupMembersRepository
|
|
from app.auth.group_sync import fetch_user_groups
|
|
import uuid
|
|
|
|
conn = get_system_db()
|
|
try:
|
|
repo = UserRepository(conn)
|
|
user = repo.get_by_email(email)
|
|
if not user:
|
|
user_id = str(uuid.uuid4())
|
|
repo.create(id=user_id, email=email, name=name)
|
|
user = repo.get_by_email(email)
|
|
if not bool(user.get("active", True)):
|
|
return RedirectResponse(url="/login?error=deactivated")
|
|
|
|
# Sync Workspace groups → user_group_members (source='google_sync').
|
|
# Fail-soft: any error leaves the previous membership snapshot in
|
|
# place; admin-added rows survive regardless.
|
|
try:
|
|
group_names = fetch_user_groups(email)
|
|
ug_repo = UserGroupsRepository(conn)
|
|
members_repo = UserGroupMembersRepository(conn)
|
|
group_ids: list[str] = []
|
|
for group_name in group_names:
|
|
g = ug_repo.ensure(group_name)
|
|
group_ids.append(g["id"])
|
|
members_repo.replace_google_sync_groups(
|
|
user["id"], group_ids, added_by="system:google-sync",
|
|
)
|
|
logger.info(
|
|
"Google group sync for %s: %d group(s) [%s]",
|
|
email, len(group_ids), ", ".join(group_names) or "<none>",
|
|
)
|
|
except Exception as sync_err: # noqa: BLE001 - fail-soft by design
|
|
logger.warning(
|
|
"Google group sync failed for %s: %s", email, sync_err
|
|
)
|
|
finally:
|
|
conn.close()
|
|
|
|
# Issue JWT — role field is legacy; pass empty string so callers
|
|
# that still inspect it during the transition don't NPE on None.
|
|
jwt_token = create_access_token(user["id"], user["email"], user.get("role") or "")
|
|
|
|
# Redirect to the post-login target. Prefer the value stashed by
|
|
# google_login() — re-sanitize defensively in case of session tampering.
|
|
target = safe_next_path(
|
|
request.session.pop("login_next", None), default="/dashboard"
|
|
)
|
|
|
|
# Redirect to target with token in cookie. Match password/email providers:
|
|
# Secure only when DOMAIN is set (production with TLS), so the cookie is
|
|
# actually sent over plain HTTP in dev.
|
|
use_secure = os.environ.get("DOMAIN", "") != ""
|
|
response = RedirectResponse(url=target, status_code=302)
|
|
response.set_cookie(
|
|
key="access_token", value=jwt_token,
|
|
httponly=True, max_age=86400, samesite="lax",
|
|
secure=use_secure,
|
|
)
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"Google OAuth error: {e}")
|
|
return RedirectResponse(url="/login?error=oauth_failed")
|