This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
181 lines
6.3 KiB
Python
181 lines
6.3 KiB
Python
"""Build & cache a bare git repo that mirrors the user's filtered plugin set.
|
|
|
|
Dulwich writes the on-disk repo; FastAPI's `git_router.py` serves it over
|
|
smart-HTTP through the WSGI bridge. The cache is keyed by the *content* ETag
|
|
(sha256 of the aggregated plugin files), so two users who resolve to the same
|
|
plugin set share one bare repo — and, because commit metadata is fixed, that
|
|
single commit hash is also stable across rebuilds.
|
|
|
|
Cache layout (per agnes-the-ai-analyst conventions):
|
|
${DATA_DIR}/marketplaces/git-cache/<etag>.git/ — bare repo
|
|
${DATA_DIR}/marketplaces/git-cache/.tmp-*.git/ — in-flight builds, atomically renamed
|
|
|
|
Stale entries are never pruned in this iteration — a different content ETag
|
|
just materializes a new directory next to the old one. First iteration of
|
|
prune logic is deferred; see plan "Out of scope".
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Dict
|
|
|
|
import duckdb
|
|
from dulwich.index import commit_tree
|
|
from dulwich.objects import Blob, Commit
|
|
from dulwich.repo import Repo
|
|
|
|
from app.marketplace_server.packager import (
|
|
MARKETPLACE_DESCRIPTION,
|
|
MARKETPLACE_NAME,
|
|
MARKETPLACE_OWNER,
|
|
)
|
|
from app.utils import get_marketplaces_dir
|
|
from src import marketplace_filter
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
FIXED_AUTHOR = b"agnes-marketplace <noreply@agnes.local>"
|
|
FIXED_TIMESTAMP = 0
|
|
FIXED_TZ = 0
|
|
FIXED_MESSAGE = b"agnes marketplace snapshot"
|
|
FIXED_ENCODING = b"UTF-8"
|
|
|
|
|
|
def cache_dir() -> Path:
|
|
return get_marketplaces_dir() / "git-cache"
|
|
|
|
|
|
def _merged_manifest_bytes(plugins: list[dict], etag: str) -> bytes:
|
|
"""Same manifest as the ZIP channel produces — kept inline to avoid
|
|
importing packager internals into the hot path."""
|
|
entries = []
|
|
for plugin in plugins:
|
|
entry = dict(plugin["raw"])
|
|
entry["name"] = plugin["prefixed_name"]
|
|
entry["source"] = f"./plugins/{plugin['prefixed_name']}"
|
|
if plugin.get("version") and "version" not in entry:
|
|
entry["version"] = plugin["version"]
|
|
entries.append(entry)
|
|
manifest = {
|
|
"name": MARKETPLACE_NAME,
|
|
"owner": MARKETPLACE_OWNER,
|
|
"metadata": {
|
|
"description": MARKETPLACE_DESCRIPTION,
|
|
"version": etag,
|
|
},
|
|
"plugins": entries,
|
|
}
|
|
return json.dumps(manifest, indent=2, sort_keys=False).encode("utf-8")
|
|
|
|
|
|
def file_set_for_user(
|
|
conn: duckdb.DuckDBPyConnection,
|
|
user: dict,
|
|
*,
|
|
plugins: list[dict] | None = None,
|
|
etag: str | None = None,
|
|
) -> Dict[str, bytes]:
|
|
"""Files that go into the bare repo tree, in the same layout as the ZIP
|
|
but without `.agnes/version.json` (which contains `generated_at` and
|
|
would force a different commit SHA on every rebuild).
|
|
|
|
When *plugins* and *etag* are supplied the expensive
|
|
``resolve_allowed_plugins`` / ``compute_etag`` round-trip is skipped
|
|
(callers that already resolved them — e.g. ``ensure_repo_for_user`` —
|
|
pass them through to avoid doubling the DB + disk-hash work).
|
|
"""
|
|
if plugins is None:
|
|
plugins = marketplace_filter.resolve_allowed_plugins(conn, user)
|
|
if etag is None:
|
|
etag = marketplace_filter.compute_etag(plugins)
|
|
|
|
files: Dict[str, bytes] = {}
|
|
files[".claude-plugin/marketplace.json"] = _merged_manifest_bytes(plugins, etag)
|
|
|
|
for plugin in plugins:
|
|
plugin_dir: Path = plugin["plugin_dir"]
|
|
if not plugin_dir.is_dir():
|
|
continue
|
|
for f in sorted(p for p in plugin_dir.rglob("*") if p.is_file()):
|
|
rel = f.relative_to(plugin_dir).as_posix()
|
|
arc = f"plugins/{plugin['prefixed_name']}/{rel}"
|
|
files[arc] = f.read_bytes()
|
|
return files
|
|
|
|
|
|
def build_bare_repo(files: Dict[str, bytes], target_path: Path) -> None:
|
|
"""Initialize a fresh bare repo at target_path with one deterministic commit.
|
|
|
|
`target_path` MUST NOT exist — caller atomically renames a tmp dir into
|
|
place so concurrent workers never observe a half-written repo.
|
|
"""
|
|
target_path.mkdir(parents=True, exist_ok=False)
|
|
repo = Repo.init_bare(str(target_path))
|
|
try:
|
|
blobs = []
|
|
for path, content in sorted(files.items()):
|
|
blob = Blob.from_string(content)
|
|
repo.object_store.add_object(blob)
|
|
blobs.append((path.encode("utf-8"), blob.id, 0o100644))
|
|
|
|
tree_sha = commit_tree(repo.object_store, blobs)
|
|
|
|
commit = Commit()
|
|
commit.tree = tree_sha
|
|
commit.parents = []
|
|
commit.author = commit.committer = FIXED_AUTHOR
|
|
commit.author_time = commit.commit_time = FIXED_TIMESTAMP
|
|
commit.author_timezone = commit.commit_timezone = FIXED_TZ
|
|
commit.encoding = FIXED_ENCODING
|
|
commit.message = FIXED_MESSAGE
|
|
repo.object_store.add_object(commit)
|
|
|
|
repo.refs[b"refs/heads/main"] = commit.id
|
|
repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/main")
|
|
finally:
|
|
repo.close()
|
|
|
|
|
|
def ensure_repo_for_user(conn: duckdb.DuckDBPyConnection, user: dict) -> Path:
|
|
"""Return the on-disk bare repo for this user's RBAC view, building it
|
|
lazily if needed. Safe under concurrent identical-etag requests: each
|
|
builder uses a unique tmp dir and atomic rename; loser deletes its tmp.
|
|
"""
|
|
plugins = marketplace_filter.resolve_allowed_plugins(conn, user)
|
|
etag = marketplace_filter.compute_etag(plugins)
|
|
|
|
root = cache_dir()
|
|
root.mkdir(parents=True, exist_ok=True)
|
|
target = root / f"{etag}.git"
|
|
if target.is_dir():
|
|
return target
|
|
|
|
files = file_set_for_user(conn, user, plugins=plugins, etag=etag)
|
|
tmp = root / f".tmp-{etag}.{uuid.uuid4().hex}.git"
|
|
try:
|
|
build_bare_repo(files, tmp)
|
|
except Exception:
|
|
shutil.rmtree(tmp, ignore_errors=True)
|
|
raise
|
|
|
|
try:
|
|
os.rename(str(tmp), str(target))
|
|
except FileExistsError:
|
|
# Another worker won the atomic-rename race. That's fine — discard ours.
|
|
if target.is_dir():
|
|
shutil.rmtree(tmp, ignore_errors=True)
|
|
else:
|
|
raise
|
|
except OSError as e:
|
|
# Windows: rename fails with WinError 183 if target exists. Same outcome.
|
|
if target.is_dir():
|
|
shutil.rmtree(tmp, ignore_errors=True)
|
|
else:
|
|
raise RuntimeError(f"git-cache rename failed: {e}") from None
|
|
return target
|