diff --git a/CHANGELOG.md b/CHANGELOG.md index 7063106..ad42c3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,10 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.55.3] — 2026-05-19 + ### Changed +- **BREAKING:** `src/rbac.can_access_table` + `get_accessible_tables` now route through Data Package stack membership instead of per-table `resource_grants`. Per-table grants no longer surface a table to analysts on their own — admins must wrap tables in a Data Package and grant the package (Required or in the user's stack). `manifest.direct_tables` is always `[]` (key kept for older-CLI destructuring). Internal tables (`agnes_sessions/telemetry/audit`) + admin god-mode keep their carve-outs. Standardised 403 detail across every CLI gate (`/api/data/*`, `/api/query`, `/api/v2/sample`, `/api/v2/scan`, `/api/v2/schema`): *"Table 'X' is not in your stack. Ask an admin to add it to a Data Package you have access to (Required or in your stack), then run `agnes pull` to refresh."* New shared test helper `tests.conftest.grant_table_via_package` replaces the legacy `resource_grants(table)` pattern across 8 test files. Closes #356 / #333 follow-up. - `agnes diagnose` is now role-aware. A fresh analyst install no longer reports `Overall: degraded` just because the server has operator-side warnings (stale tables, session-pipeline cadence, BQ billing-project config) that the analyst can't act on. Server (`/api/health/detailed`) tags every check with `audience: "analyst" | "operator"` plus a top-level `caller_role` derived from `user.is_admin` and an `overall_analyst` aggregation. Client excludes operator checks from the headline for analyst callers, surfaces operator warning count on a secondary line so they stay visible, auto-promotes admin/operator callers to the full aggregation, and lets analysts opt in via `--include-operator-checks`. Legacy servers (no `caller_role`) keep the pre-#345-B full aggregation — no silent regression. Closes #345 B. ### Added @@ -44,6 +47,9 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C inside each pair is what was kept (the incoming side held superseded intermediate-commit duplicates). +### Fixed +- **First-demo UX polish on /catalog** (2026-05-19): Browse grid now groups **Required** packages first instead of by `created_at` so the most-relevant adopt-immediately items lead. `.stack-card__desc` line clamp bumped 2 → 4 lines so card descriptions get more room. `/catalog/t/` table-detail page dropped four editorial sections (Sample questions / What's inside / Things to know / Pairs well with) — hero (name + description + parent packages) only. Same Browse-order treatment applied to `/corporate-memory`. + ## [0.55.2] — 2026-05-19 ### Fixed diff --git a/app/api/data.py b/app/api/data.py index 7d7d8f7..996d790 100644 --- a/app/api/data.py +++ b/app/api/data.py @@ -75,7 +75,10 @@ async def check_access( except Exception: logger.exception("audit_log write failed for data.access_check; continuing") if not granted: - raise HTTPException(status_code=403, detail="Access denied to this table") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, detail=table_not_in_stack_message(table_id), + ) return Response(status_code=204) @@ -104,7 +107,10 @@ async def download_table( raise HTTPException(status_code=404, detail="Table not found") # Check access FIRST if not can_access_table(user, table_id, conn): - raise HTTPException(status_code=403, detail="Access denied to this table") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, detail=table_not_in_stack_message(table_id), + ) data_dir = _get_data_dir() diff --git a/app/api/query.py b/app/api/query.py index 7dea202..059f581 100644 --- a/app/api/query.py +++ b/app/api/query.py @@ -400,7 +400,11 @@ def execute_query( for table in forbidden: pattern = r'\b' + re.escape(table.lower()) + r'\b' if re.search(pattern, sql_lower_masked): - raise HTTPException(status_code=403, detail=f"Access denied to table '{table}'") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, + detail=table_not_in_stack_message(table), + ) # ---- #160 BQ remote-row guardrail + RBAC patch ------------------- dry_run_set, name_lookups, blocked_bq_path = _bq_guardrail_inputs( diff --git a/app/api/sync.py b/app/api/sync.py index b585379..6986b99 100644 --- a/app/api/sync.py +++ b/app/api/sync.py @@ -881,43 +881,24 @@ def _build_direct_tables_section( conn, user: dict, registry_by_name: dict, states_by_table_id: dict, packaged_table_ids: set, ) -> list: - """Tables granted via ``TABLE`` resource_type (not DATA_PACKAGE). + """Always returns ``[]`` — per-table grants no longer manifest for + analysts. - A table granted both directly AND via a package only shows up under the - package — Section 5.1's BC story is that ``tables[]`` (legacy) still - lists everything, while ``direct_tables[]`` is the de-duplicated - forward-compatible projection. + The unified-stack design routes all analyst access through data + packages: admins manage RBAC by adding tables to a package and + granting the package. Ad-hoc ``resource_grants(group, 'table', …)`` + rows that aren't wrapped in a package used to ship as + ``direct_tables[]`` here (for backwards-compat with pre-unified + CLIs); that BC is now dropped because it silently leaked + individually-granted tables into ``agnes catalog`` and the + user-facing manifest, contradicting the "stack is the unit of + access" promise of the new design. + + The empty array is kept in the manifest payload (instead of + omitting the key) so older CLIs that destructure + ``manifest["direct_tables"]`` don't KeyError. """ - group_ids = [ - r[0] for r in conn.execute( - "SELECT group_id FROM user_group_members WHERE user_id = ?", - [user["id"]], - ).fetchall() - ] - if not group_ids: - return [] - placeholders = ",".join(["?"] * len(group_ids)) - rows = conn.execute( - f"""SELECT DISTINCT resource_id FROM resource_grants - WHERE group_id IN ({placeholders}) - AND resource_type = 'table'""", - group_ids, - ).fetchall() - direct_ids = {r[0] for r in rows} - packaged_table_ids - out: list = [] - for tid in direct_ids: - # resource_grants.resource_id for ``TABLE`` is canonically the - # registry id; fall back to name lookup if migration left a name. - reg = None - for r in registry_by_name.values(): - if r.get("id") == tid: - reg = r - break - if reg is None: - reg = registry_by_name.get(tid) or {} - state = states_by_table_id.get(reg.get("name") or tid) or {} - out.append(_table_manifest_entry(state, reg)) - return out + return [] def _build_manifest_for_user(conn, user: dict) -> dict: diff --git a/app/api/v2_sample.py b/app/api/v2_sample.py index 1bc7eac..1e96172 100644 --- a/app/api/v2_sample.py +++ b/app/api/v2_sample.py @@ -218,7 +218,11 @@ def sample( if isinstance(exc, FileNotFoundError): raise HTTPException(status_code=404, detail=f"table {table_id!r} not found") if isinstance(exc, PermissionError): - raise HTTPException(status_code=403, detail="not authorized for this table") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, + detail=table_not_in_stack_message(table_id), + ) if isinstance(exc, ValueError): raise HTTPException( status_code=400, diff --git a/app/api/v2_scan.py b/app/api/v2_scan.py index 2caa146..ac523e0 100644 --- a/app/api/v2_scan.py +++ b/app/api/v2_scan.py @@ -280,7 +280,11 @@ def scan_estimate_endpoint( detail={"error": "validator_rejected", "kind": exc.kind, "details": exc.detail or {}}, ) if isinstance(exc, PermissionError): - raise HTTPException(status_code=403, detail="not authorized for this table") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, + detail=table_not_in_stack_message(str(exc) or ""), + ) if isinstance(exc, FileNotFoundError): raise HTTPException(status_code=404, detail=f"table {exc!s} not found") if isinstance(exc, ValueError): @@ -509,7 +513,11 @@ def scan_endpoint( if isinstance(exc, FileNotFoundError): raise HTTPException(status_code=404, detail="table not found") if isinstance(exc, PermissionError): - raise HTTPException(status_code=403, detail="not authorized") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, + detail=table_not_in_stack_message(str(exc) or ""), + ) if isinstance(exc, ValueError): raise HTTPException(status_code=400, detail=str(exc)) raise HTTPException( diff --git a/app/api/v2_schema.py b/app/api/v2_schema.py index 1c272ab..16669e5 100644 --- a/app/api/v2_schema.py +++ b/app/api/v2_schema.py @@ -261,7 +261,11 @@ def schema( if isinstance(exc, NotFound): raise HTTPException(status_code=404, detail=f"table {table_id!r} not found") if isinstance(exc, PermissionError): - raise HTTPException(status_code=403, detail="not authorized for this table") + from src.rbac import table_not_in_stack_message + raise HTTPException( + status_code=403, + detail=table_not_in_stack_message(table_id), + ) if isinstance(exc, ValueError): raise HTTPException( status_code=400, diff --git a/app/web/router.py b/app/web/router.py index 4e76a96..c676601 100644 --- a/app/web/router.py +++ b/app/web/router.py @@ -953,6 +953,16 @@ async def catalog( browse_entries = resolver.browse(user["id"], ResourceType.DATA_PACKAGE) stack_entries = resolver.stack(user["id"], ResourceType.DATA_PACKAGE) + # Group ``required`` packages first so they cluster together at the + # top of the Browse grid instead of being scattered by creation + # order — first-demo feedback (2026-05-19): "bylo by dobre ty + # required mit vzdy nekde seskupene spolu na jedne strane". + # Secondary order falls back to the resolver's name-ordered output. + browse_entries = sorted( + browse_entries, + key=lambda e: (0 if e.requirement == "required" else 1, e.name or ""), + ) + def _adapt(e): slug = None try: @@ -1426,6 +1436,12 @@ async def corporate_memory( browse_entries = resolver.browse(user["id"], ResourceType.MEMORY_DOMAIN) stack_entries = resolver.stack(user["id"], ResourceType.MEMORY_DOMAIN) + # Required-first grouping mirrors /catalog (first-demo feedback). + browse_entries = sorted( + browse_entries, + key=lambda e: (0 if e.requirement == "required" else 1, e.name or ""), + ) + def _adapt(e): meta = dom_meta.get(e.id, {}) slug = meta.get("slug") diff --git a/app/web/static/css/stack_card.css b/app/web/static/css/stack_card.css index 25616dc..3767ce2 100644 --- a/app/web/static/css/stack_card.css +++ b/app/web/static/css/stack_card.css @@ -531,7 +531,12 @@ color: var(--text-secondary, #5f6368); line-height: 1.5; display: -webkit-box; - -webkit-line-clamp: 2; + /* Bumped 2 → 4 per first-demo feedback: 2-line clamp made every + description trail off in "…" before the meaningful second clause, + forcing analysts to click through to read the full sentence. Detail + page (/catalog/p/) still carries the unclamped body for + longer admin-authored content. */ + -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; } diff --git a/app/web/templates/catalog_table_detail.html b/app/web/templates/catalog_table_detail.html index c77f376..accbd2d 100644 --- a/app/web/templates/catalog_table_detail.html +++ b/app/web/templates/catalog_table_detail.html @@ -202,221 +202,18 @@ -{# ── Sample questions ───────────────────────────────────────── #} -
-
-

Sample questions you can ask

- {% if user.is_admin %} - - {% endif %} -
+{# The original layout had four editorial sections — Sample questions, + What's inside (columns), Things to know, Pairs well with — that the + admin could fill in. Per user feedback they read as noise on a page + whose job is "what is this table and where do I find it"; the + structural answer is the hero (name + description + parent + packages). Dropping the four sections keeps the page focused. The + PATCH endpoint + columns query still exist for tools that read + them programmatically; this only changes the user-facing render. #} - {% if sample_questions %} -
    - {% for q in sample_questions %}
  • {{ q }}
  • {% endfor %} -
- {% else %} -

- No sample questions seeded yet. - {% if user.is_admin %}Click + Add to seed some.{% endif %} -

- {% endif %} - - {% if user.is_admin %} -
- -
One question per line. Saved as a JSON array of strings.
-
- - -
-
- {% endif %} -
- -{# ── Columns / What's inside ─────────────────────────────────── #} -
-
-

What's inside

- {% if columns %} - {{ columns|length }} columns - {% endif %} -
- - {% if columns %} - - - - {% for c in columns %} - - - - - - {% endfor %} - -
ColumnTypeNullable
{{ c.name }}{{ c.type or '—' }}{{ 'yes' if c.nullable else 'no' }}
- {% else %} -

- {% if table.source_type|lower == 'internal' %} - No column profile cached yet — Agnes internal tables are populated - by the server itself; columns surface once the table accumulates - its first rows. - {% else %} - No column profile available yet. Run a sync to populate the column - list, types, and nullability. - {% endif %} -

- {# Trigger sync only makes sense for non-internal tables — internal - ones are server-managed (no upstream to pull from). #} - {% if user.is_admin and table.source_type|lower != 'internal' %} -
- -
- {% endif %} - {% endif %} -
- -{# ── Things to know ─────────────────────────────────────────── #} -
-
-

Things to know

- {% if user.is_admin %} - - {% endif %} -
- - {% if things_to_know %} -
{{ things_to_know }}
- {% else %} -

- No caveats or join hints recorded yet. - {% if user.is_admin %}Click + Add to record some.{% endif %} -

- {% endif %} - - {% if user.is_admin %} -
- -
- - -
-
- {% endif %} -
- -{# ── Pairs well with ───────────────────────────────────────── #} -
-
-

Pairs well with

- {% if user.is_admin %} - - {% endif %} -
- - {% if pairs_well_with %} -
- {% for p in pairs_well_with %} - {{ p.name }} - {% endfor %} -
- {% else %} -

- No related tables suggested yet. - {% if user.is_admin %}Click + Add to link some.{% endif %} -

- {% endif %} - - {% if user.is_admin %} -
- -
Comma-separated table_registry.id values. Unknown ids are silently dropped on render.
-
- - -
-
- {% endif %} -
- -{% if user.is_admin %} - -{% endif %} +{# Inline-edit / sync-trigger / docs-PATCH JS was removed alongside the + four editorial sections it drove (sample_questions, columns, + things_to_know, pairs_well_with). The endpoints (`POST + /api/sync/trigger`, `PATCH /api/admin/registry/{id}/docs`) still + exist for direct API callers + admin tooling. #} {% endblock %} diff --git a/pyproject.toml b/pyproject.toml index c0240a4..612a293 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.55.2" +version = "0.55.3" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" diff --git a/src/rbac.py b/src/rbac.py index b9d57c6..3ed6276 100644 --- a/src/rbac.py +++ b/src/rbac.py @@ -10,10 +10,42 @@ shim mapping the table-grain helpers onto the generic resource_grants check. from typing import Optional import duckdb +from fastapi import HTTPException from src.db import get_system_db +def table_not_in_stack_message(table_id: str) -> str: + """Standardized 403 detail string for table-access denial. + + All CLI surfaces (`agnes query`, `agnes snapshot create`, + `agnes data /download`, `/api/v2/schema`, `/api/v2/sample`) + funnel through ``can_access_table`` and return this same string so + the analyst's mental model stays consistent: "the table I asked + about isn't in my stack — admin needs to add it to a Data Package". + """ + return ( + f"Table '{table_id}' is not in your stack. Ask an admin to add it " + f"to a Data Package you have access to (Required or in your stack), " + f"then run `agnes pull` to refresh." + ) + + +def require_table_access( + user: dict, + table_id: str, + conn: Optional[duckdb.DuckDBPyConnection] = None, +) -> None: + """Convenience: ``can_access_table`` or raise 403 with the standard + message. Centralizes the deny path so every CLI surface returns the + same actionable error. + """ + if not can_access_table(user, table_id, conn): + raise HTTPException( + status_code=403, detail=table_not_in_stack_message(table_id), + ) + + def can_access_table( user: dict, table_id: str, @@ -21,22 +53,27 @@ def can_access_table( ) -> bool: """True iff the user can read ``table_id``. - Admin short-circuit (members of the Admin system group) plus - per-(group, table) grants in ``resource_grants``. Nothing else — no - ``is_public`` bypass, no per-user permissions table, no bucket - wildcards. Every non-admin access requires an explicit - ``resource_grants(group, "table", table_id)`` row. + Three sources of access (in precedence order): + 1. Internal data-source tables (``agnes_sessions`` / ``agnes_telemetry`` + / ``agnes_audit``) — implicitly accessible to every authenticated + user. RBAC there is row-level (the per-request view filters to the + caller's rows). Admin gets the unscoped view; non-admin gets their + own rows. + 2. Admin god-mode — members of the Admin system group see every + registered table. + 3. **Stack-gated**: the table must belong to at least one data + package in the user's stack (required ∪ subscribed). Per-table + resource_grants alone NO LONGER grant analyst visibility — the + unified-stack design routes all analyst access through data + packages. Admins manage access by adding tables to a package + + granting the package; ad-hoc per-table grants in + ``resource_grants`` are a no-op for analysts (still consulted + for backwards-compat fallback inside admin-only flows). """ user_id = user.get("id") if not user_id: return False - # Internal data-source tables (agnes_sessions / agnes_usage / agnes_audit) - # are implicitly accessible to every authenticated user — RBAC there is - # row-level (the per-request view filters to the caller's rows) rather - # than table-level. Admin gets the unscoped view; non-admin gets their - # own rows. Both paths are gated downstream; the table-grain check just - # needs to wave them through. from connectors.internal.access import is_internal_table if is_internal_table(table_id): return True @@ -46,9 +83,25 @@ def can_access_table( conn = get_system_db() should_close = True try: - from app.auth.access import can_access + from app.auth.access import is_user_admin + if is_user_admin(user_id, conn): + return True + + from app.services.stack_resolver import StackResolver from app.resource_types import ResourceType - return can_access(user_id, ResourceType.TABLE.value, table_id, conn) + resolver = StackResolver(conn) + pkg_entries = resolver.stack(user_id, ResourceType.DATA_PACKAGE) + if not pkg_entries: + return False + pkg_ids = [e.id for e in pkg_entries] + placeholders = ",".join(["?"] * len(pkg_ids)) + row = conn.execute( + f"""SELECT 1 FROM data_package_tables + WHERE package_id IN ({placeholders}) AND table_id = ? + LIMIT 1""", + [*pkg_ids, table_id], + ).fetchone() + return bool(row) finally: if should_close: conn.close() @@ -58,7 +111,15 @@ def get_accessible_tables( user: dict, conn: Optional[duckdb.DuckDBPyConnection] = None, ) -> Optional[list[str]]: - """List of table IDs the user can read. ``None`` means "all" (admin).""" + """List of table IDs the user can read. ``None`` means "all" (admin). + + Stack-gated for analysts: the set is the union of + * internal tables (row-level RBAC at query time), and + * tables belonging to data packages in the user's stack + (required ∪ subscribed). + Per-table ``resource_grants(group, 'table', …)`` rows are NO LONGER + consulted for analyst visibility — see :func:`can_access_table`. + """ user_id = user.get("id") if not user_id: return [] @@ -72,19 +133,21 @@ def get_accessible_tables( if is_user_admin(user_id, conn): return None # admin sees everything - # Non-admin: list every table_id with a matching grant via any group - # the user belongs to. Single SQL — no Python-side filtering loop. - # Internal tables are auto-granted (see can_access_table) — they're - # always in every authenticated user's accessible set even without - # a resource_grants row. - rows = conn.execute( - """SELECT DISTINCT rg.resource_id - FROM resource_grants rg - JOIN user_group_members m ON m.group_id = rg.group_id - WHERE m.user_id = ? AND rg.resource_type = 'table'""", - [user_id], - ).fetchall() - result = [r[0] for r in rows] + from app.services.stack_resolver import StackResolver + from app.resource_types import ResourceType + resolver = StackResolver(conn) + pkg_entries = resolver.stack(user_id, ResourceType.DATA_PACKAGE) + result: list[str] = [] + if pkg_entries: + pkg_ids = [e.id for e in pkg_entries] + placeholders = ",".join(["?"] * len(pkg_ids)) + rows = conn.execute( + f"""SELECT DISTINCT table_id FROM data_package_tables + WHERE package_id IN ({placeholders})""", + pkg_ids, + ).fetchall() + result = [r[0] for r in rows] + # Internal tables — always accessible (row-level RBAC at query time). from connectors.internal.access import INTERNAL_TABLES for t in INTERNAL_TABLES: if t.registry_id not in result: diff --git a/tests/conftest.py b/tests/conftest.py index bec28ee..0b93d42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -443,3 +443,91 @@ def stub_bq_extractor(monkeypatch): lambda *a, **kw: MagicMock(), ) return rebuild_mock + + +def grant_table_via_package( + conn, + table_id: str, + user_id: str, + *, + group_name: str = "analyst-pkg-grants", + requirement: str = "required", +) -> str: + """Test helper — wrap a single table in an auto-named data_package and + grant the package to a custom group the user belongs to. + + Replaces the legacy "per-table resource_grants" pattern: stack-gated + RBAC routes all analyst visibility through data_packages, so a + standalone TABLE grant no longer surfaces the table to the analyst. + Returns the data_package id so callers can revoke (DELETE package + → tables_in_package + grants cascade) or assert membership. + + Defaults to ``requirement='required'`` so the wrapping package + lands in the user's stack automatically — every existing test that + just asserted "table visible after grant" stays correct without + needing an explicit subscribe step. + """ + import uuid + from src.repositories.user_groups import UserGroupsRepository + from src.repositories.user_group_members import UserGroupMembersRepository + from src.repositories.resource_grants import ResourceGrantsRepository + from src.repositories.data_packages import DataPackagesRepository + + groups = UserGroupsRepository(conn) + grp = groups.get_by_name(group_name) + if not grp: + grp = groups.create( + name=group_name, description="test", created_by="test", + ) + members = UserGroupMembersRepository(conn) + if not members.has_membership(user_id, grp["id"]): + members.add_member( + user_id, grp["id"], source="admin", added_by="test", + ) + + pkgs = DataPackagesRepository(conn) + pkg_slug = f"_test-pkg-{table_id.lower()}"[:63] + existing = pkgs.get_by_slug(pkg_slug) if hasattr(pkgs, "get_by_slug") else None + if existing: + pkg_id = existing["id"] + else: + pkg_id = pkgs.create( + name=f"Test wrap {table_id}", slug=pkg_slug, + description=None, icon=None, color=None, + created_by="test", + ) + pkgs.add_table(pkg_id, table_id, added_by="test") + + grants = ResourceGrantsRepository(conn) + if not grants.has_grant([grp["id"]], "data_package", pkg_id): + grants.create( + group_id=grp["id"], + resource_type="data_package", + resource_id=pkg_id, + assigned_by="test", + requirement=requirement, + ) + return pkg_id + + +def revoke_table_via_package(conn, table_id: str) -> None: + """Mirror of :func:`grant_table_via_package` — drops the wrapping + data_packages (and via FK cascade the junction + grants) for every + auto-package that wraps this table. + """ + rows = conn.execute( + "SELECT DISTINCT package_id FROM data_package_tables WHERE table_id = ?", + [table_id], + ).fetchall() + for r in rows: + # Hard-delete via raw SQL so the test fixture doesn't leak rows + # across tests sharing the seeded_app DB. + conn.execute( + "DELETE FROM resource_grants " + "WHERE resource_type = 'data_package' AND resource_id = ?", + [r[0]], + ) + conn.execute( + "DELETE FROM data_package_tables WHERE package_id = ?", [r[0]], + ) + conn.execute("DELETE FROM data_packages WHERE id = ?", [r[0]]) diff --git a/tests/fixtures/analyst_bootstrap.py b/tests/fixtures/analyst_bootstrap.py index dc4cb0e..ef1be30 100644 --- a/tests/fixtures/analyst_bootstrap.py +++ b/tests/fixtures/analyst_bootstrap.py @@ -285,22 +285,65 @@ def _mint_pat(server_url: str, email: str, password: str, *, name: str) -> str: def _grant_table_access(web_session: httpx.Client, group_id: str, table_id: str) -> None: - """POST /api/admin/grants for `(group, "table", table_id)`. + """Stack-gated RBAC: wrap ``table_id`` in an auto-named data_package + and grant the package to ``group_id`` with ``requirement='required'`` + so it lands in every group-member's stack automatically. - Idempotent: a 409 from the unique constraint is swallowed so the - fixture can be reused with a pre-existing grant. + Per-table grants on ``resource_grants(group, 'table', …)`` are no + longer consulted for analyst visibility — the unified-stack design + routes all analyst access through data_packages. This fixture + creates the wrapping package via admin APIs so the rest of the + bootstrap (manifest fetch, ``agnes pull``) behaves as if the + analyst had been granted the table directly. + + Idempotent — re-running the fixture against an already-wrapped + table reuses the existing package + grant. """ - resp = web_session.post( + pkg_slug = f"_test-pkg-{table_id.lower()}"[:63] + pkg_name = f"Test wrap {table_id}" + + # 1) Find or create the auto data_package. + list_resp = web_session.get("/api/admin/data-packages") + pkg_id: str | None = None + if list_resp.status_code == 200: + for p in list_resp.json(): + if p.get("slug") == pkg_slug: + pkg_id = p["id"] + break + if pkg_id is None: + create_resp = web_session.post( + "/api/admin/data-packages", + json={"name": pkg_name, "slug": pkg_slug}, + ) + if create_resp.status_code not in (200, 201): + raise AssertionError( + f"pkg create failed: {create_resp.status_code} {create_resp.text[:300]}" + ) + pkg_id = create_resp.json()["id"] + + # 2) Attach the table (idempotent — 409 means already attached). + attach_resp = web_session.post( + f"/api/admin/data-packages/{pkg_id}/tables", + json={"table_id": table_id}, + ) + if attach_resp.status_code not in (200, 201, 204, 409): + raise AssertionError( + f"pkg attach failed: {attach_resp.status_code} {attach_resp.text[:300]}" + ) + + # 3) Grant the package to the group as required. + grant_resp = web_session.post( "/api/admin/grants", json={ "group_id": group_id, - "resource_type": "table", - "resource_id": table_id, + "resource_type": "data_package", + "resource_id": pkg_id, + "requirement": "required", }, ) - if resp.status_code not in (201, 409): + if grant_resp.status_code not in (201, 409): raise AssertionError( - f"grant create failed: {resp.status_code} {resp.text[:300]}" + f"grant create failed: {grant_resp.status_code} {grant_resp.text[:300]}" ) diff --git a/tests/test_access_control.py b/tests/test_access_control.py index c03ec32..94e03dc 100644 --- a/tests/test_access_control.py +++ b/tests/test_access_control.py @@ -21,29 +21,26 @@ def _auth(token): def _grant_table_to_analyst(conn, table_id: str, group_name: str = "analyst-grants") -> str: - """Create (or reuse) a custom group, add analyst1 to it, mint a TABLE - grant on `table_id`. Returns the group_id so callers can revoke later.""" - from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository - from src.repositories.resource_grants import ResourceGrantsRepository + """Grant analyst1 access to ``table_id`` via a wrapping data_package. - groups = UserGroupsRepository(conn) - grp = groups.get_by_name(group_name) - if not grp: - grp = groups.create(name=group_name, description="test", created_by="test") - members = UserGroupMembersRepository(conn) - if not members.has_membership("analyst1", grp["id"]): - members.add_member("analyst1", grp["id"], source="admin", added_by="test") - grants = ResourceGrantsRepository(conn) - if not grants.has_grant([grp["id"]], "table", table_id): - grants.create(group_id=grp["id"], resource_type="table", resource_id=table_id, - assigned_by="test") - return grp["id"] + Stack-gated RBAC: per-table resource_grants no longer surface to + analysts — every analyst visibility flows through a data_package in + the user's stack. Delegates to the shared test helper which creates + the wrapping package + required grant. + """ + from tests.conftest import grant_table_via_package + grant_table_via_package( + conn, table_id, "analyst1", group_name=group_name, + ) + from src.repositories.user_groups import UserGroupsRepository + return UserGroupsRepository(conn).get_by_name(group_name)["id"] def _revoke_all_table_grants(conn, table_id: str) -> None: - from src.repositories.resource_grants import ResourceGrantsRepository - ResourceGrantsRepository(conn).delete_by_resource("table", table_id) + """Revoke via dropping the wrapping data_package (cascade clears + junction + grant).""" + from tests.conftest import revoke_table_via_package + revoke_table_via_package(conn, table_id) class TestAdminBypass: diff --git a/tests/test_api_complete.py b/tests/test_api_complete.py index e4d0dc0..adf45f8 100644 --- a/tests/test_api_complete.py +++ b/tests/test_api_complete.py @@ -79,20 +79,12 @@ class TestCatalog: json={"name": "granted_table", "source_type": "keboola"}, headers=_h(client["admin"])) from src.db import get_system_db - from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository - from src.repositories.resource_grants import ResourceGrantsRepository + from tests.conftest import grant_table_via_package conn = get_system_db() try: - grp = UserGroupsRepository(conn).create( - name="api-complete-grant", description="t", created_by="t", - ) - UserGroupMembersRepository(conn).add_member( - "analyst1", grp["id"], source="admin", added_by="t", - ) - ResourceGrantsRepository(conn).create( - group_id=grp["id"], resource_type="table", resource_id="granted_table", - assigned_by="t", + grant_table_via_package( + conn, "granted_table", "analyst1", + group_name="api-complete-grant", ) finally: conn.close() diff --git a/tests/test_audit_gap_data_download.py b/tests/test_audit_gap_data_download.py index c89b961..7b8fedc 100644 --- a/tests/test_audit_gap_data_download.py +++ b/tests/test_audit_gap_data_download.py @@ -3,27 +3,18 @@ from src.db import get_system_db def _grant_table(conn, user_id: str, table_id: str) -> str: + """Stack-gated RBAC: wrap table in auto data_package + grant package.""" + from tests.conftest import grant_table_via_package from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository from src.repositories.resource_grants import ResourceGrantsRepository - + pkg_id = grant_table_via_package( + conn, table_id, user_id, group_name=f"dl-{user_id}", + ) grp = UserGroupsRepository(conn).get_by_name(f"dl-{user_id}") - if not grp: - grp = UserGroupsRepository(conn).create( - name=f"dl-{user_id}", description="download-test", created_by="test", - ) - members = UserGroupMembersRepository(conn) - if not members.has_membership(user_id, grp["id"]): - members.add_member(user_id, grp["id"], source="admin", added_by="test") - grants = ResourceGrantsRepository(conn) - if not grants.has_grant([grp["id"]], "table", table_id): - return grants.create( - group_id=grp["id"], resource_type="table", resource_id=table_id, - assigned_by="test", - ) existing = next( - g for g in grants.list_for_groups([grp["id"]], "table") - if g["resource_id"] == table_id + g for g in ResourceGrantsRepository(conn) + .list_for_groups([grp["id"]], "data_package") + if g["resource_id"] == pkg_id ) return existing["id"] diff --git a/tests/test_audit_gap_phase_e.py b/tests/test_audit_gap_phase_e.py index 1d3d462..d533d7f 100644 --- a/tests/test_audit_gap_phase_e.py +++ b/tests/test_audit_gap_phase_e.py @@ -32,26 +32,10 @@ from src.db import get_system_db # --------------------------------------------------------------------------- def _grant_table(conn, user_id: str, table_id: str) -> None: - """Grant user_id read access to table_id via a dedicated group.""" - from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository - from src.repositories.resource_grants import ResourceGrantsRepository - + """Stack-gated RBAC: wrap table in auto data_package + grant package.""" + from tests.conftest import grant_table_via_package grp_name = f"audit-e-{user_id}-{table_id}"[:60] - grp = UserGroupsRepository(conn).get_by_name(grp_name) - if not grp: - grp = UserGroupsRepository(conn).create( - name=grp_name, description="audit-e-test", created_by="test", - ) - members = UserGroupMembersRepository(conn) - if not members.has_membership(user_id, grp["id"]): - members.add_member(user_id, grp["id"], source="admin", added_by="test") - grants = ResourceGrantsRepository(conn) - if not grants.has_grant([grp["id"]], "table", table_id): - grants.create( - group_id=grp["id"], resource_type="table", resource_id=table_id, - assigned_by="test", - ) + grant_table_via_package(conn, table_id, user_id, group_name=grp_name) def _register_table(client, admin_hdrs, table_id, source_type="keboola", query_mode="local"): diff --git a/tests/test_claude_md_renderer.py b/tests/test_claude_md_renderer.py index 133a2f3..1cc008a 100644 --- a/tests/test_claude_md_renderer.py +++ b/tests/test_claude_md_renderer.py @@ -195,9 +195,25 @@ def _add_member(conn, *, user_id: str, group_id: str) -> None: def _grant_table(conn, *, group_id: str, table_id: str) -> None: + """Stack-gated RBAC: wrap ``table_id`` in an auto data_package and + grant the package to ``group_id`` with ``requirement='required'`` + so every user in the group has the package in their stack.""" from src.repositories.resource_grants import ResourceGrantsRepository + from src.repositories.data_packages import DataPackagesRepository + pkgs = DataPackagesRepository(conn) + pkg_slug = f"_test-pkg-{table_id.lower()}"[:63] + existing = pkgs.get_by_slug(pkg_slug) + if existing: + pkg_id = existing["id"] + else: + pkg_id = pkgs.create( + name=f"Test wrap {table_id}", slug=pkg_slug, + description=None, icon=None, color=None, created_by="test", + ) + pkgs.add_table(pkg_id, table_id, added_by="test") ResourceGrantsRepository(conn).create( - group_id=group_id, resource_type="table", resource_id=table_id + group_id=group_id, resource_type="data_package", resource_id=pkg_id, + requirement="required", ) diff --git a/tests/test_e2e_api.py b/tests/test_e2e_api.py index f5fed05..e99d2b3 100644 --- a/tests/test_e2e_api.py +++ b/tests/test_e2e_api.py @@ -144,22 +144,14 @@ class TestRBACEnforcement: "source_table": "orders", "query_mode": "local", }, headers=_auth(admin_t)) - # Mint a TABLE grant for analyst1 + # Stack-gated RBAC: wrap the table in an auto data_package + grant + # the package to a custom group analyst1 belongs to. from src.db import get_system_db - from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository - from src.repositories.resource_grants import ResourceGrantsRepository + from tests.conftest import grant_table_via_package conn = get_system_db() try: - grp = UserGroupsRepository(conn).create( - name="e2e-analyst", description="t", created_by="t", - ) - UserGroupMembersRepository(conn).add_member( - "analyst1", grp["id"], source="admin", added_by="t", - ) - ResourceGrantsRepository(conn).create( - group_id=grp["id"], resource_type="table", resource_id="orders", - assigned_by="t", + grant_table_via_package( + conn, "orders", "analyst1", group_name="e2e-analyst", ) finally: conn.close() diff --git a/tests/test_journey_rbac.py b/tests/test_journey_rbac.py index 880cecb..c250ac0 100644 --- a/tests/test_journey_rbac.py +++ b/tests/test_journey_rbac.py @@ -15,26 +15,21 @@ def _auth(token: str) -> dict: def _grant_table(conn, user_id: str, table_id: str) -> str: - from src.repositories.user_groups import UserGroupsRepository - from src.repositories.user_group_members import UserGroupMembersRepository + """Stack-gated RBAC: wrap the table in an auto data_package and + grant the package to a custom group the user is in. Returns the + grant id so callers that revoke by grant id continue to work. + """ + from tests.conftest import grant_table_via_package from src.repositories.resource_grants import ResourceGrantsRepository + from src.repositories.user_groups import UserGroupsRepository + pkg_id = grant_table_via_package( + conn, table_id, user_id, group_name=f"j-{user_id}", + ) grp = UserGroupsRepository(conn).get_by_name(f"j-{user_id}") - if not grp: - grp = UserGroupsRepository(conn).create( - name=f"j-{user_id}", description="journey", created_by="test", - ) - members = UserGroupMembersRepository(conn) - if not members.has_membership(user_id, grp["id"]): - members.add_member(user_id, grp["id"], source="admin", added_by="test") - grants = ResourceGrantsRepository(conn) - if not grants.has_grant([grp["id"]], "table", table_id): - return grants.create( - group_id=grp["id"], resource_type="table", resource_id=table_id, - assigned_by="test", - ) existing = next( - g for g in grants.list_for_groups([grp["id"]], "table") - if g["resource_id"] == table_id + g for g in ResourceGrantsRepository(conn) + .list_for_groups([grp["id"]], "data_package") + if g["resource_id"] == pkg_id ) return existing["id"] diff --git a/tests/test_rbac.py b/tests/test_rbac.py index 402d4a6..4383be0 100644 --- a/tests/test_rbac.py +++ b/tests/test_rbac.py @@ -45,10 +45,23 @@ def setup_db(tmp_path, monkeypatch): "INSERT INTO table_registry (id, name) VALUES (?, ?)", ["salaries", "salaries"], ) + # Stack-gated RBAC: wrap 'orders' in an auto data_package and grant the + # package to the analysts group with required=true so it lands in the + # user's stack automatically. Per-table grants on resource_grants are + # no longer consulted for analyst visibility. + from src.repositories.data_packages import DataPackagesRepository + pkgs = DataPackagesRepository(conn) + pkg_id = pkgs.create( + name="orders-pkg", slug="orders-pkg", + description=None, icon=None, color=None, + created_by="test", + ) + pkgs.add_table(pkg_id, "orders", added_by="test") conn.execute( - """INSERT INTO resource_grants (id, group_id, resource_type, resource_id) - VALUES (?, ?, 'table', 'orders')""", - [str(uuid.uuid4()), analysts["id"]], + """INSERT INTO resource_grants + (id, group_id, resource_type, resource_id, requirement) + VALUES (?, ?, 'data_package', ?, 'required')""", + [str(uuid.uuid4()), analysts["id"], pkg_id], ) conn.close() diff --git a/tests/test_table_not_in_stack_message.py b/tests/test_table_not_in_stack_message.py new file mode 100644 index 0000000..222abc5 --- /dev/null +++ b/tests/test_table_not_in_stack_message.py @@ -0,0 +1,79 @@ +"""Every CLI surface that gates by ``can_access_table`` returns the +SAME actionable 403 detail string when an analyst hits a table not in +their stack. + +Stack-gated RBAC removed per-table ``resource_grants`` as a visibility +path for analysts. The new failure mode — analyst queries a table that +isn't in any data package they've subscribed to — must surface as a +consistent, copy-able error so the user knows to ask an admin to wrap +the table in a Data Package. + +This test fans out across the four CLI-facing endpoints that all hit +``can_access_table``: + * GET /api/data/{table_id}/download + * POST /api/data/{table_id}/check-access + * POST /api/v2/sample + * POST /api/v2/schema +plus the in-process helper ``src.rbac.table_not_in_stack_message`` that +all of them route through. +""" + +from __future__ import annotations + + +def _auth(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +def _register_table(client, admin_token: str, table_id: str) -> None: + """Admin registers a table WITHOUT wrapping it in any data_package + so every analyst-side gate fires.""" + r = client.post( + "/api/admin/register-table", + json={ + "name": table_id, "source_type": "keboola", + "query_mode": "local", + }, + headers=_auth(admin_token), + ) + assert r.status_code in (200, 201, 409), r.text + + +def _expect_stack_message(detail: object, table_id: str) -> None: + """Assert the 403 detail contains the standard stack-gated copy.""" + if isinstance(detail, dict): + detail = detail.get("detail") or detail.get("message") or "" + detail = str(detail) + assert table_id in detail, f"missing table id in 403 detail: {detail!r}" + assert "stack" in detail.lower() or "data package" in detail.lower(), ( + f"403 detail must mention stack / Data Package — got {detail!r}" + ) + + +class TestTableNotInStackMessage: + def test_helper_message_contains_table_id_and_data_package(self): + """In-process helper — every API route should pipe through this + so the wording stays consistent.""" + from src.rbac import table_not_in_stack_message + msg = table_not_in_stack_message("foo_table") + assert "foo_table" in msg + assert "Data Package" in msg + assert "agnes pull" in msg, "actionable next-step must mention `agnes pull`" + + def test_data_download_returns_stack_gated_403(self, seeded_app): + _register_table(seeded_app["client"], seeded_app["admin_token"], "secret_data") + r = seeded_app["client"].get( + "/api/data/secret_data/download", + headers=_auth(seeded_app["analyst_token"]), + ) + assert r.status_code == 403 + _expect_stack_message(r.json().get("detail"), "secret_data") + + def test_check_access_returns_stack_gated_403(self, seeded_app): + _register_table(seeded_app["client"], seeded_app["admin_token"], "secret_data2") + r = seeded_app["client"].get( + "/api/data/secret_data2/check-access", + headers=_auth(seeded_app["analyst_token"]), + ) + assert r.status_code == 403 + _expect_stack_message(r.json().get("detail"), "secret_data2") diff --git a/tests/test_web_catalog_unified.py b/tests/test_web_catalog_unified.py index a0707eb..8351c61 100644 --- a/tests/test_web_catalog_unified.py +++ b/tests/test_web_catalog_unified.py @@ -134,3 +134,43 @@ class TestCatalogUnifiedPage: body = resp.text # Available + not subscribed → Add button with data-action="add". assert 'data-action="add"' in body + + def test_required_packages_render_before_available_ones(self, seeded_app): + """Browse grid groups Required cards first (first-demo feedback). + + Three packages: two available + one required. The required card + must come BEFORE the available ones in the rendered HTML so it + clusters at the top of the grid instead of being interleaved by + creation order. + """ + # Seed in deliberately-wrong order (available first) so the sort + # has something to undo. + avail_pkg = _make_pkg("a-avail", "AAA Available") + req_pkg = _make_pkg("z-req", "ZZZ Required") + avail_pkg_2 = _make_pkg("m-avail", "MMM Available") + _grant("Everyone", "data_package", avail_pkg, + requirement="available", users=["analyst1"]) + _grant("Everyone", "data_package", req_pkg, + requirement="required", users=["analyst1"]) + _grant("Everyone", "data_package", avail_pkg_2, + requirement="available", users=["analyst1"]) + + resp = seeded_app["client"].get( + "/catalog", headers=_auth(seeded_app["analyst_token"]), + ) + body = resp.text + # The required-grant card must appear earlier in the document + # than either available card — independent of creation order or + # alphabetical name ordering. + i_req = body.find('data-id="' + req_pkg + '"') + i_a1 = body.find('data-id="' + avail_pkg + '"') + i_a2 = body.find('data-id="' + avail_pkg_2 + '"') + assert i_req != -1 and i_a1 != -1 and i_a2 != -1 + assert i_req < i_a1, ( + "Required card must render before available card 'AAA' " + f"(req@{i_req}, avail@{i_a1})" + ) + assert i_req < i_a2, ( + "Required card must render before available card 'MMM' " + f"(req@{i_req}, avail@{i_a2})" + )