diff --git a/app/api/access.py b/app/api/access.py index d121fb3..14e0950 100644 --- a/app/api/access.py +++ b/app/api/access.py @@ -183,14 +183,17 @@ async def access_overview( # Per-resource-type hierarchies. Driven by the registry in # app.resource_types — adding a new type there is the one place that - # surfaces here, no extra wiring. + # surfaces here, no extra wiring. Disabled types (e.g. TABLE without + # AGNES_ENABLE_TABLE_GRANTS) are skipped so the admin UI does not + # render a chip for grants the runtime cannot enforce yet. + from app.resource_types import enabled_resource_types resources = [ { "type_key": spec.key.value, "type_display": spec.display_name, "blocks": spec.list_blocks(conn), } - for spec in RESOURCE_TYPES.values() + for spec in enabled_resource_types() ] return {"groups": groups, "grants": grants, "resources": resources} @@ -369,17 +372,18 @@ async def delete_group( raise HTTPException(status_code=404, detail="Group not found") if g.get("is_system"): raise HTTPException(status_code=409, detail="Cannot delete a system group") - # Atomic delete: cascade child rows then the group itself, all in one - # transaction. group_id has no FK constraints in the schema — the - # cascade is application-level — so an unhandled mid-flight failure - # without the BEGIN/COMMIT wrap could leave orphaned member / grant - # rows that the admin UI cannot clean up (the parent group_id is - # gone). With the transaction, either all three deletes commit or - # none do. - conn.execute("BEGIN TRANSACTION") + # Cascade members + grants atomically with the group row so a partial + # failure cannot leave orphans pointing at a deleted group_id. There are + # no FK constraints (group_id is plain VARCHAR), so the application is + # responsible for the invariant — wrap in an explicit transaction. try: - conn.execute("DELETE FROM user_group_members WHERE group_id = ?", [group_id]) - conn.execute("DELETE FROM resource_grants WHERE group_id = ?", [group_id]) + conn.execute("BEGIN TRANSACTION") + conn.execute( + "DELETE FROM user_group_members WHERE group_id = ?", [group_id] + ) + conn.execute( + "DELETE FROM resource_grants WHERE group_id = ?", [group_id] + ) repo.delete(group_id) conn.execute("COMMIT") except SystemGroupProtected: @@ -562,6 +566,20 @@ async def create_grant( conn: duckdb.DuckDBPyConnection = Depends(_get_db), ): rt = _validate_resource_type(payload.resource_type) + # Feature gate: refuse to mint grants for resource types whose runtime + # enforcement is not wired up yet (e.g. ResourceType.TABLE without + # AGNES_ENABLE_TABLE_GRANTS). Listing + deleting existing rows still + # works so operators can clean up legacy data. + from app.resource_types import is_resource_type_enabled + if not is_resource_type_enabled(rt): + raise HTTPException( + status_code=422, + detail=( + f"resource_type {rt.value!r} is not currently enabled. " + "Set AGNES_ENABLE_TABLE_GRANTS=1 to opt in once the runtime " + "enforcement is in place (see docs/TODO-rbac-data-enforcement.md)." + ), + ) if not payload.resource_id.strip(): raise HTTPException(status_code=400, detail="resource_id is required") if not UserGroupsRepository(conn).get(payload.group_id): diff --git a/app/marketplace_server/packager.py b/app/marketplace_server/packager.py index 13b9937..4fa82a4 100644 --- a/app/marketplace_server/packager.py +++ b/app/marketplace_server/packager.py @@ -22,15 +22,36 @@ from __future__ import annotations import io import json +import os +import threading import zipfile from datetime import datetime, timezone -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Optional, Tuple import duckdb +from cachetools import TTLCache from src import marketplace_filter MARKETPLACE_NAME = "agnes" + +# In-process TTL cache for compute_etag() results. The expensive part of +# compute_etag is a SHA256 over every plugin file on disk; for a stable +# marketplace this hash doesn't change between requests. We key on the +# resolved plugin set (prefixed_name + version + plugin_dir path) so two +# users with the same allowed view share the same cache entry. +# +# TTL bounds drift between cache and on-disk content. Marketplace sync runs +# nightly; the default 120s TTL means the first session-start in a cold +# minute pays the SHA cost and the next ~120s of session-starts (across all +# users with the same view) hit the cache. Override with +# AGNES_MARKETPLACE_ETAG_TTL= for tests / tighter staleness bounds; +# set 0 to disable. +_ETAG_CACHE_TTL = int(os.environ.get("AGNES_MARKETPLACE_ETAG_TTL", "120")) +_ETAG_CACHE: Optional[TTLCache] = ( + TTLCache(maxsize=512, ttl=_ETAG_CACHE_TTL) if _ETAG_CACHE_TTL > 0 else None +) +_ETAG_CACHE_LOCK = threading.Lock() MARKETPLACE_OWNER = {"name": "Agnes AI Analyst"} MARKETPLACE_DESCRIPTION = ( "Aggregated per-user Claude Code marketplace — served by agnes-the-ai-analyst" @@ -132,18 +153,55 @@ def _write_zip_entry(zf: zipfile.ZipFile, arcname: str, data: bytes) -> None: zf.writestr(info, data) -def compute_zip_etag(conn: duckdb.DuckDBPyConnection, user: dict) -> str: - """Cheaply compute the ETag for a user's marketplace ZIP. +def _etag_cache_key(plugins: List[dict]) -> tuple: + return tuple( + sorted( + (p["prefixed_name"], p.get("version") or "", str(p["plugin_dir"])) + for p in plugins + ) + ) - Resolves allowed plugins and hashes their on-disk content without reading - file bodies into memory or building the ZIP. Used by the router to serve - ``304 Not Modified`` without the full build cost. + +def compute_etag_for_user( + conn: duckdb.DuckDBPyConnection, user: dict +) -> Tuple[str, List[dict]]: + """Resolve the user's allowed plugins and compute their content-addressed + ETag, without doing any file collection or ZIP assembly. + + Returns (etag, plugins) so callers that proceed to build_zip can reuse + the resolved plugin set and skip the second DB query. """ plugins = marketplace_filter.resolve_allowed_plugins(conn, user) - return marketplace_filter.compute_etag(plugins) + if _ETAG_CACHE is None: + return marketplace_filter.compute_etag(plugins), plugins + cache_key = _etag_cache_key(plugins) + with _ETAG_CACHE_LOCK: + cached = _ETAG_CACHE.get(cache_key) + if cached is not None: + return cached, plugins + etag = marketplace_filter.compute_etag(plugins) + with _ETAG_CACHE_LOCK: + _ETAG_CACHE[cache_key] = etag + return etag, plugins -def build_zip(conn: duckdb.DuckDBPyConnection, user: dict) -> Tuple[bytes, str]: +def invalidate_etag_cache() -> None: + """Drop all cached etags. Called by marketplace sync after refresh so the + next request re-hashes against the new on-disk content instead of waiting + for TTL expiry.""" + if _ETAG_CACHE is None: + return + with _ETAG_CACHE_LOCK: + _ETAG_CACHE.clear() + + +def build_zip( + conn: duckdb.DuckDBPyConnection, + user: dict, + *, + plugins: Optional[List[dict]] = None, + etag: Optional[str] = None, +) -> Tuple[bytes, str]: """Build the deterministic ZIP for this user. Returns (bytes, etag). The `.agnes/version.json` entry carries `generated_at` for diagnostics and @@ -151,9 +209,12 @@ def build_zip(conn: duckdb.DuckDBPyConnection, user: dict) -> Tuple[bytes, str]: for the ZIP channel (the ETag gate is computed from content hashes *before* that file is added). The git channel uses file_set_for_user() instead, which deliberately omits this diagnostic file. + + Callers that already resolved plugins + etag (e.g. the router after an + If-None-Match miss) pass them as kwargs so we don't redo the work. """ - plugins = marketplace_filter.resolve_allowed_plugins(conn, user) - etag = marketplace_filter.compute_etag(plugins) + if plugins is None or etag is None: + etag, plugins = compute_etag_for_user(conn, user) members = _collect_members(plugins, etag) diff --git a/app/marketplace_server/router.py b/app/marketplace_server/router.py index aa04b45..d01c66e 100644 --- a/app/marketplace_server/router.py +++ b/app/marketplace_server/router.py @@ -41,16 +41,14 @@ async def marketplace_zip( conn: duckdb.DuckDBPyConnection = Depends(_get_db), ) -> Response: if_none_match = request.headers.get("if-none-match", "").strip().strip('"') - - # Compute the ETag cheaply (DB query + file hash scan) before doing the - # expensive ZIP build (file reads + compression). The common case — a - # Claude Code SessionStart hook with a matching ETag — returns 304 - # without any file IO or ZIP work. - etag = packager.compute_zip_etag(conn, user) + # Resolve the etag first — this lets a 304 short-circuit before we read + # every plugin file off disk and run ZIP_DEFLATED. Hot path on every + # Claude Code SessionStart. + etag, plugins = packager.compute_etag_for_user(conn, user) if if_none_match and if_none_match == etag: return Response(status_code=304, headers={"ETag": f'"{etag}"'}) - data, etag = packager.build_zip(conn, user) + data, _ = packager.build_zip(conn, user, plugins=plugins, etag=etag) return Response( content=data, media_type="application/zip", diff --git a/app/resource_types.py b/app/resource_types.py index 761327f..9de1a8d 100644 --- a/app/resource_types.py +++ b/app/resource_types.py @@ -25,6 +25,7 @@ value verbatim. from __future__ import annotations +import os from dataclasses import dataclass from enum import StrEnum from typing import TYPE_CHECKING, Any, Callable, List @@ -187,12 +188,40 @@ RESOURCE_TYPES: dict[ResourceType, ResourceTypeSpec] = { } +def is_resource_type_enabled(rt: ResourceType) -> bool: + """Whether a resource type is exposed to the admin UI + grant API. + + ``ResourceType.TABLE`` is gated behind ``AGNES_ENABLE_TABLE_GRANTS=1`` + (default off) until the data-plane runtime check delegates to + ``app.auth.access.can_access`` (currently still flows through legacy + ``dataset_permissions``). Without runtime enforcement, exposing the chip + is misleading: admins grant access through /admin/access, but the user + still gets filtered out at /api/catalog. See + ``docs/TODO-rbac-data-enforcement.md`` step 1. + + Existing TABLE grants in ``resource_grants`` remain in the DB regardless + — this flag controls UI exposure + new-grant acceptance, not data. + """ + if rt is ResourceType.TABLE: + return os.environ.get("AGNES_ENABLE_TABLE_GRANTS", "").lower() in { + "1", "true", "yes", "on", + } + return True + + +def enabled_resource_types() -> list[ResourceTypeSpec]: + """The subset of RESOURCE_TYPES currently surfaced to admins.""" + return [spec for rt, spec in RESOURCE_TYPES.items() if is_resource_type_enabled(rt)] + + def list_resource_types() -> list[dict[str, str]]: """Flat projection for /api/admin/resource-types. Shape: ``[{key, display_name, description, id_format}]``. The ``list_blocks`` delegate is intentionally omitted — the UI consumes - blocks via ``/api/admin/access-overview`` instead. + blocks via ``/api/admin/access-overview`` instead. Disabled types + (e.g. TABLE without ``AGNES_ENABLE_TABLE_GRANTS``) are filtered out so + the admin UI does not advertise grants the runtime cannot enforce. """ return [ { @@ -201,5 +230,5 @@ def list_resource_types() -> list[dict[str, str]]: "description": spec.description, "id_format": spec.id_format, } - for spec in RESOURCE_TYPES.values() + for spec in enabled_resource_types() ] diff --git a/cli/commands/admin.py b/cli/commands/admin.py index 53bac2a..921e520 100644 --- a/cli/commands/admin.py +++ b/cli/commands/admin.py @@ -531,3 +531,104 @@ def grant_resource_types(as_json: bool = typer.Option(False, "--json")): ("display_name", "DISPLAY NAME", 28), ("id_format", "ID FORMAT", 36), ]) + + +# --------------------------------------------------------------------------- +# Break-glass: out-of-band admin grant. +# +# Talks directly to system.duckdb — no HTTP, no auth dependency. The whole +# point is recovery for the case where the running server's authorization +# layer is broken or there is no admin left to authenticate as. Requires +# filesystem access to ${DATA_DIR}/state/system.duckdb and is therefore +# restricted to operators with shell access on the host. +# --------------------------------------------------------------------------- + + +breakglass_app = typer.Typer( + help="Out-of-band recovery (talks directly to system.duckdb)", +) +admin_app.add_typer(breakglass_app, name="break-glass") + + +@breakglass_app.command("grant-admin") +def break_glass_grant_admin( + email: str = typer.Argument(..., help="Email of the user to promote"), + yes: bool = typer.Option( + False, "--yes", "-y", help="Skip confirmation prompt" + ), +) -> None: + """Grant Admin-group membership to a user without going through the API. + + Operates directly on system.duckdb. Use when the server is up but the + Admin group has no live members (race, mistake, accidental DELETE) or + when bootstrapping a brand-new install before any admin exists. Membership + is recorded with source='cli_break_glass' so it's distinguishable from + google_sync / admin / system_seed in audits. + + The DuckDB file must not be locked by a running app process — stop the + app or use a separate replica before running this. + """ + import uuid as _uuid + + from src.db import SYSTEM_ADMIN_GROUP, get_system_db + from src.repositories.user_groups import UserGroupsRepository + from src.repositories.user_group_members import UserGroupMembersRepository + from src.repositories.users import UserRepository + + if not yes: + confirm = typer.confirm( + f"Grant Admin-group membership to {email!r} (break-glass)?", + default=False, + ) + if not confirm: + typer.echo("Aborted.") + raise typer.Exit(1) + + conn = get_system_db() + try: + users = UserRepository(conn) + groups = UserGroupsRepository(conn) + members = UserGroupMembersRepository(conn) + + admin_group = groups.get_by_name(SYSTEM_ADMIN_GROUP) + if admin_group is None: + typer.echo( + f"FATAL: '{SYSTEM_ADMIN_GROUP}' group missing. Start the app " + "once so _seed_system_groups can recreate it, then retry.", + err=True, + ) + raise typer.Exit(2) + + existing = users.get_by_email(email) + if existing is None: + user_id = _uuid.uuid4().hex + users.create( + id=user_id, + email=email, + name=email.split("@", 1)[0], + role="admin", + ) + typer.echo(f"Created user {email} (id={user_id[:8]}…)") + else: + user_id = existing["id"] + + if members.has_membership(user_id, admin_group["id"]): + typer.echo( + f"{email} is already a member of '{SYSTEM_ADMIN_GROUP}'." + ) + return + + members.add_member( + user_id=user_id, + group_id=admin_group["id"], + source="cli_break_glass", + added_by="cli:break-glass", + ) + typer.echo( + f"Granted Admin to {email}. Audit source='cli_break_glass'." + ) + finally: + try: + conn.close() + except Exception: + pass diff --git a/pyproject.toml b/pyproject.toml index f53936d..98e2259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,9 @@ dependencies = [ # Claude Code marketplace endpoint — pure-Python git server mounted in FastAPI "dulwich>=0.22.0", "a2wsgi>=1.10.0", + # In-process TTL cache for marketplace etag (transitively present via + # google-auth, declared explicitly here because we depend on it directly). + "cachetools>=5.3.0", ] [project.optional-dependencies] diff --git a/src/db.py b/src/db.py index cf13706..501ba93 100644 --- a/src/db.py +++ b/src/db.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) _SAFE_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$") -SCHEMA_VERSION = 13 +SCHEMA_VERSION = 14 _SYSTEM_SCHEMA = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -284,9 +284,16 @@ CREATE TABLE IF NOT EXISTS user_groups ( -- The `source` column tracks who created the row so each source only mutates -- its own rows — Google sync's nightly DELETE+INSERT does NOT clobber -- admin-added members, and admin UI deletions don't fight the sync loop. +-- +-- v14: group_id now FK→user_groups(id). DuckDB FK enforcement blocks the +-- parent DELETE while children exist, so the application must delete +-- members + resource_grants BEFORE the user_groups row (see +-- app/api/access.py:delete_group). DuckDB does NOT support ON DELETE +-- CASCADE, so we rely on explicit transactional cleanup at the call site +-- and let the FK serve as a defense-in-depth invariant. CREATE TABLE IF NOT EXISTS user_group_members ( user_id VARCHAR NOT NULL, - group_id VARCHAR NOT NULL, + group_id VARCHAR NOT NULL REFERENCES user_groups(id), source VARCHAR NOT NULL, -- 'admin' | 'google_sync' | 'system_seed' added_at TIMESTAMP DEFAULT current_timestamp, added_by VARCHAR, @@ -298,9 +305,11 @@ CREATE TABLE IF NOT EXISTS user_group_members ( -- app.resource_types.ResourceType enum (e.g. 'marketplace_plugin'). -- resource_id is a path string whose format is owned by the module that -- registered the resource type (e.g. '/'). +-- +-- v14: group_id FK→user_groups(id), same rationale as user_group_members. CREATE TABLE IF NOT EXISTS resource_grants ( id VARCHAR PRIMARY KEY, - group_id VARCHAR NOT NULL, + group_id VARCHAR NOT NULL REFERENCES user_groups(id), resource_type VARCHAR NOT NULL, resource_id VARCHAR NOT NULL, assigned_at TIMESTAMP DEFAULT current_timestamp, @@ -977,6 +986,110 @@ def _v12_to_v13_finalize(conn: duckdb.DuckDBPyConnection) -> None: raise +def _v13_to_v14_finalize(conn: duckdb.DuckDBPyConnection) -> None: + """Add FOREIGN KEY (group_id) → user_groups(id) on user_group_members + and resource_grants. + + DuckDB does not support ALTER TABLE ADD CONSTRAINT for foreign keys, so + the migration recreates each table: + + 1. Pre-clean orphan rows (group_id no longer in user_groups). + These should not exist on a clean v13 DB but the app-layer + cascade was best-effort before this PR (see #3). + 2. RENAME old table to *_v13_pre. + 3. CREATE TABLE with the FK (matches the v14 _SYSTEM_SCHEMA). + 4. INSERT … SELECT from *_v13_pre. + 5. DROP *_v13_pre. + + Wrapped in BEGIN TRANSACTION so a mid-flight failure rolls back to + a clean v13 state and the outer caller skips the schema_version bump. + DuckDB does NOT support ON DELETE CASCADE — see _SYSTEM_SCHEMA above + and app/api/access.py:delete_group for the explicit cascade. + """ + orphan_members = conn.execute( + """SELECT COUNT(*) FROM user_group_members + WHERE group_id NOT IN (SELECT id FROM user_groups)""" + ).fetchone()[0] + orphan_grants = conn.execute( + """SELECT COUNT(*) FROM resource_grants + WHERE group_id NOT IN (SELECT id FROM user_groups)""" + ).fetchone()[0] + if orphan_members: + logger.warning( + "v14 migration: dropping %d orphan user_group_members rows " + "(group_id pointed at a deleted user_groups.id)", + orphan_members, + ) + if orphan_grants: + logger.warning( + "v14 migration: dropping %d orphan resource_grants rows", + orphan_grants, + ) + + conn.execute("BEGIN TRANSACTION") + try: + # Orphan cleanup must happen inside the transaction so it rolls + # back together with the table swap on any failure. + conn.execute( + """DELETE FROM user_group_members + WHERE group_id NOT IN (SELECT id FROM user_groups)""" + ) + conn.execute( + """DELETE FROM resource_grants + WHERE group_id NOT IN (SELECT id FROM user_groups)""" + ) + + # user_group_members rebuild + conn.execute( + "ALTER TABLE user_group_members RENAME TO user_group_members_v13_pre" + ) + conn.execute( + """CREATE TABLE user_group_members ( + user_id VARCHAR NOT NULL, + group_id VARCHAR NOT NULL REFERENCES user_groups(id), + source VARCHAR NOT NULL, + added_at TIMESTAMP DEFAULT current_timestamp, + added_by VARCHAR, + PRIMARY KEY (user_id, group_id) + )""" + ) + conn.execute( + """INSERT INTO user_group_members + (user_id, group_id, source, added_at, added_by) + SELECT user_id, group_id, source, added_at, added_by + FROM user_group_members_v13_pre""" + ) + conn.execute("DROP TABLE user_group_members_v13_pre") + + # resource_grants rebuild + conn.execute( + "ALTER TABLE resource_grants RENAME TO resource_grants_v13_pre" + ) + conn.execute( + """CREATE TABLE resource_grants ( + id VARCHAR PRIMARY KEY, + group_id VARCHAR NOT NULL REFERENCES user_groups(id), + resource_type VARCHAR NOT NULL, + resource_id VARCHAR NOT NULL, + assigned_at TIMESTAMP DEFAULT current_timestamp, + assigned_by VARCHAR, + UNIQUE (group_id, resource_type, resource_id) + )""" + ) + conn.execute( + """INSERT INTO resource_grants + (id, group_id, resource_type, resource_id, assigned_at, assigned_by) + SELECT id, group_id, resource_type, resource_id, assigned_at, assigned_by + FROM resource_grants_v13_pre""" + ) + conn.execute("DROP TABLE resource_grants_v13_pre") + + conn.execute("COMMIT") + except Exception: + conn.execute("ROLLBACK") + raise + + def _seed_core_roles(conn: duckdb.DuckDBPyConnection) -> None: """Idempotently insert/refresh the four core.* hierarchy roles. @@ -1210,6 +1323,8 @@ def _ensure_schema(conn: duckdb.DuckDBPyConnection) -> None: for sql in _V12_TO_V13_MIGRATIONS: conn.execute(sql) _v12_to_v13_finalize(conn) + if current < 14: + _v13_to_v14_finalize(conn) conn.execute( "UPDATE schema_version SET version = ?, applied_at = current_timestamp", [SCHEMA_VERSION], diff --git a/src/marketplace.py b/src/marketplace.py index bf73beb..ded315c 100644 --- a/src/marketplace.py +++ b/src/marketplace.py @@ -278,6 +278,16 @@ def sync_marketplaces() -> Dict[str, Any]: finally: conn.close() + # Drop cached etags so the next /marketplace.zip request re-hashes against + # the freshly-synced content rather than waiting for TTL expiry. Late + # import: keeps src.marketplace decoupled from the FastAPI app surface. + if synced: + try: + from app.marketplace_server import packager as _packager + _packager.invalidate_etag_cache() + except ImportError: + pass + return {"synced": synced, "errors": errors} diff --git a/src/repositories/user_groups.py b/src/repositories/user_groups.py index 6117678..03aebf9 100644 --- a/src/repositories/user_groups.py +++ b/src/repositories/user_groups.py @@ -115,13 +115,19 @@ class UserGroupsRepository: name: Optional[str] = None, description: Optional[str] = None, ) -> None: - # Block mutation of system groups — name/description are seeded and - # callers must not be able to rename "Admin" / "Everyone" out from - # under the marketplace filter. + # Block rename of system groups — the canonical names "Admin" / + # "Everyone" are referenced from `app.auth.access` and the + # marketplace filter and must not move. Description edits are + # cosmetic and allowed (admins curate them in /admin/access). existing = self.get(group_id) - if existing and existing.get("is_system"): + if ( + existing + and existing.get("is_system") + and name is not None + and name != existing["name"] + ): raise SystemGroupProtected( - f"group {existing.get('name')!r} is a system group and cannot be modified" + f"group {existing.get('name')!r} is a system group and cannot be renamed" ) sets: List[str] = [] params: List[Any] = [] diff --git a/tests/test_db.py b/tests/test_db.py index c5fd65f..9addc92 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -762,3 +762,131 @@ class TestSchemaV12: assert grants == [("marketplace_plugin", "foundry-ai/metrics")] finally: conn.close() + + def test_v12_to_v13_finalize_rollback_on_failure(self, tmp_path, monkeypatch): + """Mid-flight failure in _v12_to_v13_finalize rolls the v13 backfill + back to a clean v12 state and the next start retries the migration. + + Setup mirrors test_v12_to_v13_migration_backfill — a hand-crafted v12 + DB with sample data that the finalize would otherwise migrate. We + monkey-patch _seed_system_groups (the first call inside the + transaction) to raise mid-finalize and verify: + + 1. schema_version stays at 12. + 2. Legacy tables (user_role_grants, plugin_access, …) are NOT + dropped — the finalize had not reached the DROP step. + 3. user_group_members + resource_grants are EMPTY (the inserts + that ran before the failure were rolled back). + 4. A second start succeeds and produces the same final state as + a clean run. + """ + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + import json + import uuid + import duckdb as _duckdb + from src import db as _db + from src.db import get_system_db, get_schema_version, SCHEMA_VERSION + + db_path = tmp_path / "state" / "system.duckdb" + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = _duckdb.connect(str(db_path)) + conn.execute(""" + CREATE TABLE schema_version (version INTEGER, applied_at TIMESTAMP DEFAULT current_timestamp); + INSERT INTO schema_version (version) VALUES (12); + CREATE TABLE users ( + id VARCHAR PRIMARY KEY, email VARCHAR UNIQUE NOT NULL, name VARCHAR, role VARCHAR, + password_hash VARCHAR, setup_token VARCHAR, setup_token_created TIMESTAMP, + reset_token VARCHAR, reset_token_created TIMESTAMP, + active BOOLEAN DEFAULT TRUE, deactivated_at TIMESTAMP, deactivated_by VARCHAR, + groups JSON, created_at TIMESTAMP, updated_at TIMESTAMP + ); + CREATE TABLE internal_roles (id VARCHAR PRIMARY KEY, key VARCHAR UNIQUE NOT NULL, + display_name VARCHAR NOT NULL, description TEXT, owner_module VARCHAR, + implies VARCHAR, is_core BOOLEAN, created_at TIMESTAMP, updated_at TIMESTAMP); + CREATE TABLE user_role_grants (id VARCHAR PRIMARY KEY, + user_id VARCHAR REFERENCES users(id), + internal_role_id VARCHAR REFERENCES internal_roles(id), + granted_at TIMESTAMP, granted_by VARCHAR, source VARCHAR); + CREATE TABLE group_mappings (id VARCHAR PRIMARY KEY, external_group_id VARCHAR, + internal_role_id VARCHAR REFERENCES internal_roles(id), + assigned_at TIMESTAMP, assigned_by VARCHAR); + CREATE TABLE user_groups (id VARCHAR PRIMARY KEY, name VARCHAR UNIQUE, + description TEXT, is_system BOOLEAN, created_at TIMESTAMP, created_by VARCHAR); + CREATE TABLE plugin_access (group_id VARCHAR, marketplace_id VARCHAR, + plugin_name VARCHAR, granted_at TIMESTAMP, granted_by VARCHAR); + """) + admin_uid = str(uuid.uuid4()) + conn.execute( + "INSERT INTO users (id, email, name, groups) VALUES (?, ?, ?, ?)", + [admin_uid, 'admin@x', 'A', json.dumps(['Engineering'])], + ) + eng_id = str(uuid.uuid4()) + conn.execute("INSERT INTO user_groups (id, name) VALUES (?, ?)", [eng_id, 'Engineering']) + core_admin = str(uuid.uuid4()) + conn.execute( + "INSERT INTO internal_roles (id, key, display_name) VALUES (?, 'core.admin', 'Admin')", + [core_admin], + ) + conn.execute( + "INSERT INTO user_role_grants (id, user_id, internal_role_id) VALUES (?, ?, ?)", + [str(uuid.uuid4()), admin_uid, core_admin], + ) + conn.execute( + "INSERT INTO plugin_access (group_id, marketplace_id, plugin_name) VALUES (?, ?, ?)", + [eng_id, 'foundry-ai', 'metrics'], + ) + conn.close() + + # Inject a failure inside the v12→v13 finalize transaction. + original_seed = _db._seed_system_groups + def _boom(_conn): + raise RuntimeError("synthetic mid-flight failure") + monkeypatch.setattr(_db, "_seed_system_groups", _boom) + + with pytest.raises(RuntimeError, match="synthetic mid-flight failure"): + get_system_db() + # Drop the cached connection the failed _ensure_schema may have + # registered (its lock is held; we want a clean re-attempt below). + _db._system_db_conn = None + + # Open the DB raw and verify rollback. + conn = _duckdb.connect(str(db_path)) + try: + assert get_schema_version(conn) == 12, ( + "schema_version must stay at 12 after rollback" + ) + tables = { + r[0] for r in conn.execute( + "SELECT table_name FROM information_schema.tables" + ).fetchall() + } + for legacy in ("internal_roles", "group_mappings", + "user_role_grants", "plugin_access"): + assert legacy in tables, ( + f"{legacy} must NOT be dropped on rollback" + ) + # New tables exist (created by _V12_TO_V13_MIGRATIONS before the + # finalize ran) but contain no rows. + assert tables.issuperset({"user_group_members", "resource_grants"}) + count_members = conn.execute( + "SELECT COUNT(*) FROM user_group_members" + ).fetchone()[0] + count_grants = conn.execute( + "SELECT COUNT(*) FROM resource_grants" + ).fetchone()[0] + assert count_members == 0, "backfill rows leaked past ROLLBACK" + assert count_grants == 0, "backfill rows leaked past ROLLBACK" + finally: + conn.close() + + # Restore the real finalize and verify a fresh start completes. + monkeypatch.setattr(_db, "_seed_system_groups", original_seed) + conn = get_system_db() + try: + assert get_schema_version(conn) == SCHEMA_VERSION + count_members = conn.execute( + "SELECT COUNT(*) FROM user_group_members" + ).fetchone()[0] + assert count_members > 0, "retry should backfill members" + finally: + conn.close() diff --git a/tests/test_repositories.py b/tests/test_repositories.py index 186433f..0d8d4f1 100644 --- a/tests/test_repositories.py +++ b/tests/test_repositories.py @@ -381,7 +381,7 @@ class TestUserGroupsRepository: row = repo.create(name="SysGroup", description="seeded", is_system=True) assert row["is_system"] is True - def test_update_system_group_blocked(self, db_conn): + def test_update_system_group_rename_blocked(self, db_conn): from src.repositories.user_groups import ( UserGroupsRepository, SystemGroupProtected, ) @@ -390,6 +390,22 @@ class TestUserGroupsRepository: with pytest.raises(SystemGroupProtected): repo.update(row["id"], name="Renamed") + def test_update_system_group_description_allowed(self, db_conn): + from src.repositories.user_groups import UserGroupsRepository + repo = UserGroupsRepository(db_conn) + row = repo.create(name="Admin3", description="seeded", is_system=True) + repo.update(row["id"], description="curated by ops") + assert repo.get(row["id"])["description"] == "curated by ops" + + def test_update_system_group_same_name_no_op(self, db_conn): + """Endpoint may pass payload.name unchanged on a description PATCH; + repo must not raise when the name argument equals the existing name.""" + from src.repositories.user_groups import UserGroupsRepository + repo = UserGroupsRepository(db_conn) + row = repo.create(name="Admin4", description="seeded", is_system=True) + repo.update(row["id"], name="Admin4", description="curated") + assert repo.get(row["id"])["description"] == "curated" + def test_delete_system_group_blocked(self, db_conn): from src.repositories.user_groups import ( UserGroupsRepository, SystemGroupProtected, @@ -418,6 +434,20 @@ class TestUserGroupsRepository: assert promoted["id"] == row["id"] assert promoted["is_system"] is True + def test_ensure_system_preserves_user_description_on_promotion(self, db_conn): + """Pre-existing non-system group named 'Admin' (e.g. created manually + on a v9 deploy) must be promoted in place without losing the operator's + description. Startup must never fatal-error here.""" + from src.repositories.user_groups import UserGroupsRepository + repo = UserGroupsRepository(db_conn) + manual = repo.create(name="ClaimMe", description="operator-curated") + promoted = repo.ensure_system("ClaimMe", "seeded default") + assert promoted["id"] == manual["id"] + assert promoted["is_system"] is True + # Description on the existing row is preserved — the seeded default + # only applies when a brand-new system group is created. + assert promoted["description"] == "operator-curated" + def test_ensure_creates_missing(self, db_conn): from src.repositories.user_groups import UserGroupsRepository repo = UserGroupsRepository(db_conn) diff --git a/tests/test_resource_types.py b/tests/test_resource_types.py index 362f300..5a8d9ec 100644 --- a/tests/test_resource_types.py +++ b/tests/test_resource_types.py @@ -108,7 +108,13 @@ class TestResourceTypeRegistration: class TestAccessOverviewIncludesTables: - def test_tables_section_present(self, seeded_app): + """Table grants are gated behind AGNES_ENABLE_TABLE_GRANTS until runtime + enforcement lands (see docs/TODO-rbac-data-enforcement.md). These tests + opt in via the env var so they exercise the behavior the feature actually + targets — see TestTableGrantsFeatureFlag below for the disabled path.""" + + def test_tables_section_present(self, seeded_app, monkeypatch): + monkeypatch.setenv("AGNES_ENABLE_TABLE_GRANTS", "1") c = seeded_app["client"] resp = c.get( "/api/admin/access-overview", @@ -119,7 +125,8 @@ class TestAccessOverviewIncludesTables: assert "table" in type_keys assert "marketplace_plugin" in type_keys # regression — still there - def test_seeded_tables_appear_in_overview(self, seeded_app): + def test_seeded_tables_appear_in_overview(self, seeded_app, monkeypatch): + monkeypatch.setenv("AGNES_ENABLE_TABLE_GRANTS", "1") conn = get_system_db() try: TableRegistryRepository(conn).register( @@ -144,3 +151,94 @@ class TestAccessOverviewIncludesTables: for it in block["items"] } assert "overview_test" in all_resource_ids + + +class TestTableGrantsFeatureFlag: + """ResourceType.TABLE is hidden from the admin UI + grant API by default. + Until docs/TODO-rbac-data-enforcement.md step 1 lands, the runtime check + still consults legacy dataset_permissions, so granting a TABLE chip + in /admin/access would appear to work but produce 403s downstream.""" + + def test_resource_types_endpoint_excludes_table_by_default( + self, seeded_app, monkeypatch + ): + monkeypatch.delenv("AGNES_ENABLE_TABLE_GRANTS", raising=False) + c = seeded_app["client"] + resp = c.get( + "/api/admin/resource-types", + headers=_auth(seeded_app["admin_token"]), + ) + assert resp.status_code == 200 + keys = {r["key"] for r in resp.json()} + assert "table" not in keys + assert "marketplace_plugin" in keys + + def test_resource_types_endpoint_includes_table_when_enabled( + self, seeded_app, monkeypatch + ): + monkeypatch.setenv("AGNES_ENABLE_TABLE_GRANTS", "1") + c = seeded_app["client"] + resp = c.get( + "/api/admin/resource-types", + headers=_auth(seeded_app["admin_token"]), + ) + keys = {r["key"] for r in resp.json()} + assert "table" in keys + + def test_create_table_grant_rejected_when_disabled( + self, seeded_app, monkeypatch + ): + monkeypatch.delenv("AGNES_ENABLE_TABLE_GRANTS", raising=False) + # Need a non-system group to grant against; use the seed Engineering + # / Everyone path via /admin/groups. + c = seeded_app["client"] + admin = _auth(seeded_app["admin_token"]) + gresp = c.post( + "/api/admin/groups", + headers=admin, + json={"name": "table-grant-test"}, + ) + assert gresp.status_code == 201 + gid = gresp.json()["id"] + resp = c.post( + "/api/admin/grants", + headers=admin, + json={ + "group_id": gid, + "resource_type": "table", + "resource_id": "some.table", + }, + ) + assert resp.status_code == 422 + assert "AGNES_ENABLE_TABLE_GRANTS" in resp.json()["detail"] + + def test_create_table_grant_accepted_when_enabled( + self, seeded_app, monkeypatch + ): + monkeypatch.setenv("AGNES_ENABLE_TABLE_GRANTS", "1") + conn = get_system_db() + try: + TableRegistryRepository(conn).register( + id="ff_table", name="ff_table", + bucket="in.c-ff", source_type="dummy", is_public=False, + ) + finally: + conn.close() + c = seeded_app["client"] + admin = _auth(seeded_app["admin_token"]) + gresp = c.post( + "/api/admin/groups", + headers=admin, + json={"name": "table-grant-on"}, + ) + gid = gresp.json()["id"] + resp = c.post( + "/api/admin/grants", + headers=admin, + json={ + "group_id": gid, + "resource_type": "table", + "resource_id": "ff_table", + }, + ) + assert resp.status_code == 201