This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
130 lines
4.3 KiB
Python
130 lines
4.3 KiB
Python
"""Dataset access checks — orthogonal to the v12 admin/group RBAC model.
|
|
|
|
The user_groups + user_group_members + resource_grants triple in
|
|
``app.auth.access`` covers app-level (Admin) and resource-level
|
|
(``ResourceType``) authorization. Dataset access is a separate axis (rows
|
|
in ``dataset_permissions`` keyed by dataset / wildcard bucket); we keep the
|
|
legacy helpers here so admin-bypass + per-table checks Just Work without
|
|
plumbing them into the resource-grants model.
|
|
|
|
This module is what ``app/api/sync.py`` and ``app/api/catalog.py`` call when
|
|
they need to filter the visible table list for a non-admin user.
|
|
"""
|
|
|
|
from typing import Optional
|
|
|
|
import duckdb
|
|
|
|
from src.db import get_system_db
|
|
|
|
|
|
def _is_admin_user_dict(user: dict, conn: Optional[duckdb.DuckDBPyConnection] = None) -> bool:
|
|
"""True iff the user is in the Admin system group.
|
|
|
|
Wraps ``app.auth.access.is_user_admin`` with the open-on-demand DB hop
|
|
that the table-access helpers expect (they accept ``conn=None`` and
|
|
open one if missing). Imported lazily so importing this module from
|
|
test fixtures doesn't pull the FastAPI deps tree.
|
|
"""
|
|
user_id = user.get("id")
|
|
if not user_id:
|
|
return False
|
|
from app.auth.access import is_user_admin
|
|
if conn is not None:
|
|
return is_user_admin(user_id, conn)
|
|
own_conn = get_system_db()
|
|
try:
|
|
return is_user_admin(user_id, own_conn)
|
|
finally:
|
|
own_conn.close()
|
|
|
|
|
|
def has_dataset_access(email: str, dataset: str) -> bool:
|
|
"""Check if user has access to a specific dataset.
|
|
|
|
Admins (Admin user_group) have access to all datasets.
|
|
Other users need explicit permission in dataset_permissions table.
|
|
"""
|
|
from src.repositories.users import UserRepository
|
|
from src.repositories.sync_settings import DatasetPermissionRepository
|
|
|
|
conn = get_system_db()
|
|
try:
|
|
user = UserRepository(conn).get_by_email(email)
|
|
if not user:
|
|
return False
|
|
if _is_admin_user_dict(user, conn=conn):
|
|
return True
|
|
return DatasetPermissionRepository(conn).has_access(user["id"], dataset)
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def can_access_table(user: dict, table_id: str, conn: Optional[duckdb.DuckDBPyConnection] = None) -> bool:
|
|
"""Check if user can access a specific table.
|
|
|
|
Rules:
|
|
1. Admin -> always True
|
|
2. Table is_public=True -> always True
|
|
3. Explicit permission in dataset_permissions -> True
|
|
4. Wildcard bucket permission (e.g., 'in.c-finance.*') -> True
|
|
5. Otherwise -> False
|
|
"""
|
|
should_close = False
|
|
if conn is None:
|
|
conn = get_system_db()
|
|
should_close = True
|
|
|
|
try:
|
|
if _is_admin_user_dict(user, conn=conn):
|
|
return True
|
|
|
|
from src.repositories.table_registry import TableRegistryRepository
|
|
from src.repositories.sync_settings import DatasetPermissionRepository
|
|
|
|
# Check if table is public
|
|
table = TableRegistryRepository(conn).get(table_id)
|
|
if table and table.get("is_public", True):
|
|
return True
|
|
|
|
user_id = user.get("id", "")
|
|
perm_repo = DatasetPermissionRepository(conn)
|
|
|
|
# Check explicit permission
|
|
if perm_repo.has_access(user_id, table_id):
|
|
return True
|
|
|
|
# Check wildcard bucket permission
|
|
bucket = table.get("bucket", "") if table else ""
|
|
if bucket and perm_repo.has_access(user_id, f"{bucket}.*"):
|
|
return True
|
|
|
|
return False
|
|
finally:
|
|
if should_close:
|
|
conn.close()
|
|
|
|
|
|
def get_accessible_tables(user: dict, conn: Optional[duckdb.DuckDBPyConnection] = None) -> Optional[list[str]]:
|
|
"""List of table IDs the user can access. None means "all" — admin bypass."""
|
|
should_close = False
|
|
if conn is None:
|
|
conn = get_system_db()
|
|
should_close = True
|
|
|
|
try:
|
|
if _is_admin_user_dict(user, conn=conn):
|
|
return None # None means "all" — admin bypass
|
|
|
|
from src.repositories.table_registry import TableRegistryRepository
|
|
repo = TableRegistryRepository(conn)
|
|
all_tables = repo.list_all()
|
|
|
|
accessible = []
|
|
for t in all_tables:
|
|
if can_access_table(user, t["id"], conn):
|
|
accessible.append(t["id"])
|
|
return accessible
|
|
finally:
|
|
if should_close:
|
|
conn.close()
|