feat: add centralized RBAC module — replace Linux group auth

New src/rbac.py: Role enum, hierarchy, get_user_role(), has_role(),
is_admin(), is_km_admin(), has_dataset_access(), set_user_role().

webapp/auth.py: admin_required + km_admin_required now use DuckDB
roles instead of Linux groups (pwd.getpwnam + sudo/data-ops check).

app/auth/dependencies.py: imports Role from src/rbac.py (single source).

11 RBAC tests passing.
This commit is contained in:
ZdenekSrotyr 2026-03-31 08:04:35 +02:00
parent 9fef90a729
commit caa60a507d
5 changed files with 225 additions and 39 deletions

View file

@ -1,6 +1,5 @@
"""FastAPI auth dependencies — current user, role checking."""
from enum import Enum
from typing import Optional
import duckdb
@ -8,24 +7,10 @@ from fastapi import Depends, HTTPException, Header, Request, status
from app.auth.jwt import verify_token
from src.db import get_system_db
from src.rbac import Role, ROLE_HIERARCHY
from src.repositories.users import UserRepository
class Role(str, Enum):
VIEWER = "viewer"
ANALYST = "analyst"
ADMIN = "admin"
KM_ADMIN = "km_admin"
ROLE_HIERARCHY = {
Role.VIEWER: 0,
Role.ANALYST: 1,
Role.KM_ADMIN: 2,
Role.ADMIN: 3,
}
def _get_db():
conn = get_system_db()
try:

View file

@ -1,30 +1,53 @@
# Connectors — How to add a new data source
## Existing Connectors
- **Keboola** (`connectors/keboola/`) — Keboola Storage API
- **BigQuery** (`connectors/bigquery/`) — Google BigQuery
- **Jira** (`connectors/jira/`) — Jira webhook + API
- **Keboola** (`connectors/keboola/extractor.py`) — DuckDB Keboola extension, batch pull
- **BigQuery** (`connectors/bigquery/extractor.py`) — DuckDB BQ extension, remote-only
- **Jira** (`connectors/jira/`) — Webhook + incremental parquet transform
## extract.duckdb Contract
Every connector produces the same output:
```
/data/extracts/{source_name}/
├── extract.duckdb ← _meta table + views
└── data/ ← parquet files (local sources only)
```
The `_meta` table must have columns:
- `table_name VARCHAR` — view name
- `description VARCHAR`
- `rows BIGINT`
- `size_bytes BIGINT`
- `extracted_at TIMESTAMP`
- `query_mode VARCHAR` — 'local' (data here) or 'remote' (query on demand)
## Adding a New Connector
1. Create `connectors/<name>/adapter.py` implementing the `DataSource` ABC:
1. Create `connectors/<name>/extractor.py`:
```python
from src.data_sync import DataSource
import duckdb
from pathlib import Path
class MyDataSource(DataSource):
def sync_table(self, table_config, sync_state): ...
def discover_tables(self): ...
def get_column_metadata(self, table_id): ...
def get_source_name(self): ...
def run(output_dir: str, table_configs: list[dict], **kwargs):
output = Path(output_dir)
data_dir = output / "data"
data_dir.mkdir(parents=True, exist_ok=True)
conn = duckdb.connect(str(output / "extract.duckdb"))
# Create _meta table
# For each table: COPY TO parquet, create view, insert _meta row
conn.close()
```
2. The factory in `src/data_sync.py:create_data_source()` auto-discovers connectors.
Set `DATA_SOURCE=<name>` in instance.yaml or .env.
2. Register tables in DuckDB `table_registry` via admin API or migration script.
Set `source_type` to your connector name.
3. Add required env vars to `.env` and `config/.env.template`.
4. Add tests to `tests/test_<name>_adapter.py`.
4. The SyncOrchestrator (`src/orchestrator.py`) will auto-discover your extract.duckdb.
## Configuration
Each connector reads credentials from environment variables.
Table definitions are in `docs/data_description.md` (YAML blocks).
- Instance-level config: `config/instance.yaml` (connection details)
- Table definitions: DuckDB `table_registry` table
- Credentials: environment variables

97
src/rbac.py Normal file
View file

@ -0,0 +1,97 @@
"""Role-based access control — centralized permission checks using DuckDB.
Replaces Linux group-based auth (sudo/data-ops admin, dataread analyst).
Used by both FastAPI (app/auth/dependencies.py) and Flask webapp (webapp/auth.py).
"""
from enum import Enum
from typing import Optional
from src.db import get_system_db
from src.repositories.users import UserRepository
class Role(str, Enum):
VIEWER = "viewer"
ANALYST = "analyst"
KM_ADMIN = "km_admin"
ADMIN = "admin"
ROLE_HIERARCHY = {
Role.VIEWER: 0,
Role.ANALYST: 1,
Role.KM_ADMIN: 2,
Role.ADMIN: 3,
}
def get_user_role(email: str) -> Role:
"""Get role for a user by email. Returns VIEWER if not found."""
conn = get_system_db()
try:
repo = UserRepository(conn)
user = repo.get_by_email(email)
if user:
try:
return Role(user.get("role", "viewer"))
except ValueError:
return Role.VIEWER
return Role.VIEWER
finally:
conn.close()
def has_role(email: str, minimum_role: Role) -> bool:
"""Check if user has at least the given role level."""
user_role = get_user_role(email)
return ROLE_HIERARCHY.get(user_role, 0) >= ROLE_HIERARCHY.get(minimum_role, 0)
def is_admin(email: str) -> bool:
"""Check if user is an admin."""
return has_role(email, Role.ADMIN)
def is_km_admin(email: str) -> bool:
"""Check if user is a KM admin or higher."""
return has_role(email, Role.KM_ADMIN)
def is_analyst(email: str) -> bool:
"""Check if user is an analyst or higher."""
return has_role(email, Role.ANALYST)
def has_dataset_access(email: str, dataset: str) -> bool:
"""Check if user has access to a specific dataset.
Admins have access to all datasets.
Other users need explicit permission in dataset_permissions table.
"""
if is_admin(email):
return True
conn = get_system_db()
try:
user = UserRepository(conn).get_by_email(email)
if not user:
return False
from src.repositories.sync_settings import DatasetPermissionRepository
return DatasetPermissionRepository(conn).has_access(user["id"], dataset)
finally:
conn.close()
def set_user_role(email: str, role: Role) -> bool:
"""Set role for a user. Returns True if successful."""
conn = get_system_db()
try:
repo = UserRepository(conn)
user = repo.get_by_email(email)
if not user:
return False
repo.update(user["id"], role=role.value)
return True
finally:
conn.close()

84
tests/test_rbac.py Normal file
View file

@ -0,0 +1,84 @@
"""Tests for src/rbac.py — role-based access control."""
import os
import pytest
@pytest.fixture
def setup_db(tmp_path):
os.environ["DATA_DIR"] = str(tmp_path)
from src.db import get_system_db
from src.repositories.users import UserRepository
conn = get_system_db()
repo = UserRepository(conn)
repo.create(id="admin1", email="admin@test.com", name="Admin", role="admin")
repo.create(id="analyst1", email="analyst@test.com", name="Analyst", role="analyst")
repo.create(id="km1", email="km@test.com", name="KM Admin", role="km_admin")
repo.create(id="viewer1", email="viewer@test.com", name="Viewer", role="viewer")
conn.close()
yield
class TestGetUserRole:
def test_admin(self, setup_db):
from src.rbac import get_user_role, Role
assert get_user_role("admin@test.com") == Role.ADMIN
def test_analyst(self, setup_db):
from src.rbac import get_user_role, Role
assert get_user_role("analyst@test.com") == Role.ANALYST
def test_unknown_user(self, setup_db):
from src.rbac import get_user_role, Role
assert get_user_role("nobody@test.com") == Role.VIEWER
class TestHasRole:
def test_admin_has_all_roles(self, setup_db):
from src.rbac import has_role, Role
assert has_role("admin@test.com", Role.VIEWER)
assert has_role("admin@test.com", Role.ANALYST)
assert has_role("admin@test.com", Role.KM_ADMIN)
assert has_role("admin@test.com", Role.ADMIN)
def test_analyst_cant_admin(self, setup_db):
from src.rbac import has_role, Role
assert has_role("analyst@test.com", Role.ANALYST)
assert not has_role("analyst@test.com", Role.ADMIN)
def test_viewer_is_minimal(self, setup_db):
from src.rbac import has_role, Role
assert has_role("viewer@test.com", Role.VIEWER)
assert not has_role("viewer@test.com", Role.ANALYST)
class TestConvenienceFunctions:
def test_is_admin(self, setup_db):
from src.rbac import is_admin
assert is_admin("admin@test.com")
assert not is_admin("analyst@test.com")
def test_is_km_admin(self, setup_db):
from src.rbac import is_km_admin
assert is_km_admin("km@test.com")
assert is_km_admin("admin@test.com") # admin >= km_admin
assert not is_km_admin("analyst@test.com")
def test_is_analyst(self, setup_db):
from src.rbac import is_analyst
assert is_analyst("analyst@test.com")
assert is_analyst("admin@test.com")
assert not is_analyst("viewer@test.com")
class TestSetUserRole:
def test_set_role(self, setup_db):
from src.rbac import set_user_role, get_user_role, Role
assert get_user_role("viewer@test.com") == Role.VIEWER
assert set_user_role("viewer@test.com", Role.ADMIN)
assert get_user_role("viewer@test.com") == Role.ADMIN
def test_set_role_nonexistent(self, setup_db):
from src.rbac import set_user_role, Role
assert not set_user_role("nobody@test.com", Role.ADMIN)

View file

@ -37,7 +37,7 @@ def login_required(f):
def admin_required(f):
"""Decorator to require admin privileges for a route.
Recomputes admin status server-side on every request.
Checks role in DuckDB users table via src/rbac.py.
Returns 403 JSON for API routes, redirect for HTML routes.
"""
@ -48,13 +48,10 @@ def admin_required(f):
return jsonify({"error": "Authentication required"}), 401
return redirect(url_for("auth.login"))
from .user_service import check_user_exists, get_webapp_username
from src.rbac import is_admin
email = session.get("user", {}).get("email", "")
username = get_webapp_username(email)
user_info = check_user_exists(username)
if not user_info.is_admin:
if not is_admin(email):
if request.path.startswith("/api/"):
return jsonify({"error": "Admin access required"}), 403
flash("Admin access required.", "error")
@ -68,7 +65,7 @@ def admin_required(f):
def km_admin_required(f):
"""Decorator to require Corporate Memory admin privileges for a route.
Checks km_admin flag via corporate_memory_service.is_km_admin().
Checks role in DuckDB users table via src/rbac.py.
Returns 403 JSON for API routes, redirect for HTML routes.
"""
@ -79,7 +76,7 @@ def km_admin_required(f):
return jsonify({"error": "Authentication required"}), 401
return redirect(url_for("auth.login"))
from .corporate_memory_service import is_km_admin
from src.rbac import is_km_admin
email = session.get("user", {}).get("email", "")
if not is_km_admin(email):