This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
156 lines
5.2 KiB
Python
156 lines
5.2 KiB
Python
"""WSGI app serving the per-user bare repo over git smart-HTTP.
|
|
|
|
Mounted at `/marketplace.git` via `a2wsgi.WSGIMiddleware`. Claude Code
|
|
registers the URL:
|
|
|
|
/plugin marketplace add https://x:<PAT>@host/marketplace.git/
|
|
|
|
git CLI does not speak Bearer tokens — it only sends HTTP Basic. By
|
|
convention (same as GitHub PATs) the username is ignored and the password
|
|
field carries the bearer token. We extract it, validate via the shared
|
|
`resolve_token_to_user`, then hand the request off to dulwich's smart-HTTP
|
|
handler scoped to the user's filtered bare repo.
|
|
|
|
Repo lifetime: dulwich writes response data via the WSGI `write()` callable,
|
|
so the returned iterable is typically empty. We wrap it in `_CloseOnExhaust`
|
|
to close the Repo handle deterministically once the body has been flushed.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import logging
|
|
from typing import Callable, Iterable, Optional
|
|
|
|
from dulwich.repo import Repo
|
|
from dulwich.server import DictBackend
|
|
from dulwich.web import HTTPGitApplication
|
|
|
|
from app.auth.pat_resolver import resolve_token_to_user
|
|
from app.marketplace_server import git_backend
|
|
from src.db import get_system_db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def token_from_basic_auth(auth_header: Optional[str]) -> Optional[str]:
|
|
"""Extract the password (= PAT in our scheme) from an HTTP Basic header.
|
|
|
|
Username is discarded; git CLI typically sends `x`, `x-access-token`,
|
|
`git`, etc. Returns None for missing / malformed / non-Basic headers.
|
|
"""
|
|
if not auth_header:
|
|
return None
|
|
parts = auth_header.split(" ", 1)
|
|
if len(parts) != 2 or parts[0].lower() != "basic":
|
|
return None
|
|
try:
|
|
decoded = base64.b64decode(parts[1], validate=True).decode("utf-8")
|
|
except (ValueError, UnicodeDecodeError):
|
|
return None
|
|
if ":" not in decoded:
|
|
return None
|
|
_, _, password = decoded.partition(":")
|
|
return password or None
|
|
|
|
|
|
def _unauthorized(start_response: Callable) -> Iterable[bytes]:
|
|
start_response(
|
|
"401 Unauthorized",
|
|
[
|
|
("Content-Type", "text/plain; charset=utf-8"),
|
|
("WWW-Authenticate", 'Basic realm="agnes-marketplace"'),
|
|
],
|
|
)
|
|
return [b"authentication required\n"]
|
|
|
|
|
|
def _server_error(start_response: Callable) -> Iterable[bytes]:
|
|
start_response(
|
|
"500 Internal Server Error",
|
|
[("Content-Type", "text/plain; charset=utf-8")],
|
|
)
|
|
return [b"internal server error\n"]
|
|
|
|
|
|
def make_git_wsgi_app() -> Callable:
|
|
"""Construct the per-request WSGI handler. The returned callable is what
|
|
`a2wsgi.WSGIMiddleware` invokes for every mounted request."""
|
|
|
|
def app(environ: dict, start_response: Callable) -> Iterable[bytes]:
|
|
token = token_from_basic_auth(environ.get("HTTP_AUTHORIZATION", ""))
|
|
|
|
conn = None
|
|
try:
|
|
conn = get_system_db()
|
|
except Exception:
|
|
logger.exception("get_system_db() failed")
|
|
return _server_error(start_response)
|
|
|
|
try:
|
|
# Git channel doesn't need the reason — just auth yes/no.
|
|
if token:
|
|
user, _reason = resolve_token_to_user(conn, token)
|
|
else:
|
|
user = None
|
|
if not user:
|
|
return _unauthorized(start_response)
|
|
|
|
try:
|
|
repo_path = git_backend.ensure_repo_for_user(conn, user)
|
|
# Use string key "/" — url_prefix() returns a str and
|
|
# DictBackend resolves str keys directly.
|
|
repo = Repo(str(repo_path))
|
|
except Exception:
|
|
logger.exception(
|
|
"Failed to open repo for user %r", user.get("email") or user.get("id")
|
|
)
|
|
return _server_error(start_response)
|
|
|
|
try:
|
|
backend = DictBackend({"/": repo})
|
|
git_app = HTTPGitApplication(backend)
|
|
inner = git_app(environ, start_response)
|
|
except Exception:
|
|
repo.close()
|
|
logger.exception("dulwich failed for user %r", user.get("id"))
|
|
return _server_error(start_response)
|
|
|
|
return _CloseOnExhaust(inner, repo)
|
|
finally:
|
|
# The DB cursor can be closed early — ensure_repo_for_user and
|
|
# resolve_token_to_user are done with it by now.
|
|
if conn is not None:
|
|
try:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
|
|
return app
|
|
|
|
|
|
class _CloseOnExhaust:
|
|
"""Wrap a WSGI response iterable, closing the Repo when the body is done.
|
|
|
|
dulwich drives output through the WSGI `write()` callable, so the
|
|
iterable itself is usually empty. We still forward `close()` in case the
|
|
WSGI server signals early termination (client disconnect).
|
|
"""
|
|
|
|
def __init__(self, inner: Iterable[bytes], repo: Repo) -> None:
|
|
self._inner = inner
|
|
self._repo = repo
|
|
|
|
def __iter__(self):
|
|
try:
|
|
yield from self._inner
|
|
finally:
|
|
self._repo.close()
|
|
|
|
def close(self) -> None:
|
|
try:
|
|
inner_close = getattr(self._inner, "close", None)
|
|
if inner_close is not None:
|
|
inner_close()
|
|
finally:
|
|
self._repo.close()
|