This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
107 lines
3.6 KiB
Python
107 lines
3.6 KiB
Python
"""Sync a user's Google Workspace group membership into users.groups.
|
|
|
|
Called from `app/auth/providers/google.py` in the OAuth callback. Uses the
|
|
Cloud Identity API (searchTransitiveGroups — returns nested group
|
|
memberships too) with Application Default Credentials from the VM metadata
|
|
server. No JSON key, no domain-wide delegation.
|
|
|
|
Required one-off Workspace setup:
|
|
- Assign Groups Admin admin role to the VM service account.
|
|
- See docs/google-workspace-groups-request.md.
|
|
|
|
Required VM config:
|
|
- `cloud-platform` access scope on the VM (already set on
|
|
grpn-sa-foundryai-execution) — covers `cloud-identity.groups.readonly`.
|
|
- Cloud Identity API enabled on the project.
|
|
|
|
Local dev / CI:
|
|
Set GOOGLE_ADMIN_SDK_MOCK_GROUPS to a comma-separated list. ADC from the
|
|
metadata server doesn't exist off-VM; without this flag local runs fall
|
|
through to the real-path and bail out with an empty list (fail-soft).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from typing import List
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SCOPE = "https://www.googleapis.com/auth/cloud-identity.groups.readonly"
|
|
|
|
# CEL label filter — regular Workspace email groups (grp_*, eng-team@..., etc).
|
|
# Skips security groups, dynamic groups, POSIX groups, which we don't use for
|
|
# plugin RBAC.
|
|
_GROUP_LABEL_DISCUSSION = "cloudidentity.googleapis.com/groups.discussion_forum"
|
|
|
|
# Env var that, when set, bypasses the real API entirely. Value is comma-
|
|
# separated group names. Empty string → empty list. Unset → real API path.
|
|
MOCK_ENV = "GOOGLE_ADMIN_SDK_MOCK_GROUPS"
|
|
|
|
|
|
def fetch_user_groups(email: str) -> List[str]:
|
|
"""Return the list of group names (emails) the user belongs to.
|
|
|
|
Fail-soft: returns [] on any error. Caller must treat this as a soft
|
|
signal (login proceeds, users.groups stays whatever it was before).
|
|
"""
|
|
mock = os.environ.get(MOCK_ENV)
|
|
if mock is not None:
|
|
return [g.strip() for g in mock.split(",") if g.strip()]
|
|
return _fetch_real(email)
|
|
|
|
|
|
def _fetch_real(email: str) -> List[str]:
|
|
try:
|
|
from google.auth import default
|
|
from googleapiclient.discovery import build
|
|
except ImportError:
|
|
logger.warning(
|
|
"google-api-python-client not installed; skipping group fetch"
|
|
)
|
|
return []
|
|
|
|
try:
|
|
creds, _ = default(scopes=[SCOPE])
|
|
service = build(
|
|
"cloudidentity", "v1", credentials=creds, cache_discovery=False
|
|
)
|
|
except Exception as e: # noqa: BLE001 - fail-soft by design
|
|
logger.warning("Google client init failed: %s", e)
|
|
return []
|
|
|
|
# Escape single quotes in the email to keep the CEL query well-formed even
|
|
# if a user has a quote in their login (rare, but defensive).
|
|
safe_email = email.replace("'", "\\'")
|
|
query = (
|
|
f"member_key_id == '{safe_email}' && "
|
|
f"'{_GROUP_LABEL_DISCUSSION}' in labels"
|
|
)
|
|
|
|
groups: List[str] = []
|
|
page_token = None
|
|
try:
|
|
while True:
|
|
resp = (
|
|
service.groups()
|
|
.memberships()
|
|
.searchTransitiveGroups(
|
|
parent="groups/-",
|
|
query=query,
|
|
pageToken=page_token,
|
|
)
|
|
.execute()
|
|
)
|
|
for m in resp.get("memberships", []):
|
|
gkey = m.get("groupKey", {}).get("id")
|
|
if gkey:
|
|
groups.append(gkey)
|
|
page_token = resp.get("nextPageToken")
|
|
if not page_token:
|
|
break
|
|
except Exception as e: # noqa: BLE001 - fail-soft by design
|
|
logger.warning("Group fetch failed for %s: %s", email, e)
|
|
return []
|
|
|
|
return groups
|