feat: add centralized RBAC module — replace Linux group auth
New src/rbac.py: Role enum, hierarchy, get_user_role(), has_role(), is_admin(), is_km_admin(), has_dataset_access(), set_user_role(). webapp/auth.py: admin_required + km_admin_required now use DuckDB roles instead of Linux groups (pwd.getpwnam + sudo/data-ops check). app/auth/dependencies.py: imports Role from src/rbac.py (single source). 11 RBAC tests passing.
This commit is contained in:
parent
9fef90a729
commit
caa60a507d
5 changed files with 225 additions and 39 deletions
|
|
@ -1,6 +1,5 @@
|
|||
"""FastAPI auth dependencies — current user, role checking."""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import duckdb
|
||||
|
|
@ -8,24 +7,10 @@ from fastapi import Depends, HTTPException, Header, Request, status
|
|||
|
||||
from app.auth.jwt import verify_token
|
||||
from src.db import get_system_db
|
||||
from src.rbac import Role, ROLE_HIERARCHY
|
||||
from src.repositories.users import UserRepository
|
||||
|
||||
|
||||
class Role(str, Enum):
|
||||
VIEWER = "viewer"
|
||||
ANALYST = "analyst"
|
||||
ADMIN = "admin"
|
||||
KM_ADMIN = "km_admin"
|
||||
|
||||
|
||||
ROLE_HIERARCHY = {
|
||||
Role.VIEWER: 0,
|
||||
Role.ANALYST: 1,
|
||||
Role.KM_ADMIN: 2,
|
||||
Role.ADMIN: 3,
|
||||
}
|
||||
|
||||
|
||||
def _get_db():
|
||||
conn = get_system_db()
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,30 +1,53 @@
|
|||
# Connectors — How to add a new data source
|
||||
|
||||
## Existing Connectors
|
||||
- **Keboola** (`connectors/keboola/`) — Keboola Storage API
|
||||
- **BigQuery** (`connectors/bigquery/`) — Google BigQuery
|
||||
- **Jira** (`connectors/jira/`) — Jira webhook + API
|
||||
- **Keboola** (`connectors/keboola/extractor.py`) — DuckDB Keboola extension, batch pull
|
||||
- **BigQuery** (`connectors/bigquery/extractor.py`) — DuckDB BQ extension, remote-only
|
||||
- **Jira** (`connectors/jira/`) — Webhook + incremental parquet transform
|
||||
|
||||
## extract.duckdb Contract
|
||||
|
||||
Every connector produces the same output:
|
||||
```
|
||||
/data/extracts/{source_name}/
|
||||
├── extract.duckdb ← _meta table + views
|
||||
└── data/ ← parquet files (local sources only)
|
||||
```
|
||||
|
||||
The `_meta` table must have columns:
|
||||
- `table_name VARCHAR` — view name
|
||||
- `description VARCHAR`
|
||||
- `rows BIGINT`
|
||||
- `size_bytes BIGINT`
|
||||
- `extracted_at TIMESTAMP`
|
||||
- `query_mode VARCHAR` — 'local' (data here) or 'remote' (query on demand)
|
||||
|
||||
## Adding a New Connector
|
||||
|
||||
1. Create `connectors/<name>/adapter.py` implementing the `DataSource` ABC:
|
||||
1. Create `connectors/<name>/extractor.py`:
|
||||
```python
|
||||
from src.data_sync import DataSource
|
||||
import duckdb
|
||||
from pathlib import Path
|
||||
|
||||
class MyDataSource(DataSource):
|
||||
def sync_table(self, table_config, sync_state): ...
|
||||
def discover_tables(self): ...
|
||||
def get_column_metadata(self, table_id): ...
|
||||
def get_source_name(self): ...
|
||||
def run(output_dir: str, table_configs: list[dict], **kwargs):
|
||||
output = Path(output_dir)
|
||||
data_dir = output / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
conn = duckdb.connect(str(output / "extract.duckdb"))
|
||||
# Create _meta table
|
||||
# For each table: COPY TO parquet, create view, insert _meta row
|
||||
conn.close()
|
||||
```
|
||||
|
||||
2. The factory in `src/data_sync.py:create_data_source()` auto-discovers connectors.
|
||||
Set `DATA_SOURCE=<name>` in instance.yaml or .env.
|
||||
2. Register tables in DuckDB `table_registry` via admin API or migration script.
|
||||
Set `source_type` to your connector name.
|
||||
|
||||
3. Add required env vars to `.env` and `config/.env.template`.
|
||||
|
||||
4. Add tests to `tests/test_<name>_adapter.py`.
|
||||
4. The SyncOrchestrator (`src/orchestrator.py`) will auto-discover your extract.duckdb.
|
||||
|
||||
## Configuration
|
||||
Each connector reads credentials from environment variables.
|
||||
Table definitions are in `docs/data_description.md` (YAML blocks).
|
||||
- Instance-level config: `config/instance.yaml` (connection details)
|
||||
- Table definitions: DuckDB `table_registry` table
|
||||
- Credentials: environment variables
|
||||
|
|
|
|||
97
src/rbac.py
Normal file
97
src/rbac.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""Role-based access control — centralized permission checks using DuckDB.
|
||||
|
||||
Replaces Linux group-based auth (sudo/data-ops → admin, dataread → analyst).
|
||||
Used by both FastAPI (app/auth/dependencies.py) and Flask webapp (webapp/auth.py).
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from src.db import get_system_db
|
||||
from src.repositories.users import UserRepository
|
||||
|
||||
|
||||
class Role(str, Enum):
|
||||
VIEWER = "viewer"
|
||||
ANALYST = "analyst"
|
||||
KM_ADMIN = "km_admin"
|
||||
ADMIN = "admin"
|
||||
|
||||
|
||||
ROLE_HIERARCHY = {
|
||||
Role.VIEWER: 0,
|
||||
Role.ANALYST: 1,
|
||||
Role.KM_ADMIN: 2,
|
||||
Role.ADMIN: 3,
|
||||
}
|
||||
|
||||
|
||||
def get_user_role(email: str) -> Role:
|
||||
"""Get role for a user by email. Returns VIEWER if not found."""
|
||||
conn = get_system_db()
|
||||
try:
|
||||
repo = UserRepository(conn)
|
||||
user = repo.get_by_email(email)
|
||||
if user:
|
||||
try:
|
||||
return Role(user.get("role", "viewer"))
|
||||
except ValueError:
|
||||
return Role.VIEWER
|
||||
return Role.VIEWER
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def has_role(email: str, minimum_role: Role) -> bool:
|
||||
"""Check if user has at least the given role level."""
|
||||
user_role = get_user_role(email)
|
||||
return ROLE_HIERARCHY.get(user_role, 0) >= ROLE_HIERARCHY.get(minimum_role, 0)
|
||||
|
||||
|
||||
def is_admin(email: str) -> bool:
|
||||
"""Check if user is an admin."""
|
||||
return has_role(email, Role.ADMIN)
|
||||
|
||||
|
||||
def is_km_admin(email: str) -> bool:
|
||||
"""Check if user is a KM admin or higher."""
|
||||
return has_role(email, Role.KM_ADMIN)
|
||||
|
||||
|
||||
def is_analyst(email: str) -> bool:
|
||||
"""Check if user is an analyst or higher."""
|
||||
return has_role(email, Role.ANALYST)
|
||||
|
||||
|
||||
def has_dataset_access(email: str, dataset: str) -> bool:
|
||||
"""Check if user has access to a specific dataset.
|
||||
|
||||
Admins have access to all datasets.
|
||||
Other users need explicit permission in dataset_permissions table.
|
||||
"""
|
||||
if is_admin(email):
|
||||
return True
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
user = UserRepository(conn).get_by_email(email)
|
||||
if not user:
|
||||
return False
|
||||
from src.repositories.sync_settings import DatasetPermissionRepository
|
||||
return DatasetPermissionRepository(conn).has_access(user["id"], dataset)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def set_user_role(email: str, role: Role) -> bool:
|
||||
"""Set role for a user. Returns True if successful."""
|
||||
conn = get_system_db()
|
||||
try:
|
||||
repo = UserRepository(conn)
|
||||
user = repo.get_by_email(email)
|
||||
if not user:
|
||||
return False
|
||||
repo.update(user["id"], role=role.value)
|
||||
return True
|
||||
finally:
|
||||
conn.close()
|
||||
84
tests/test_rbac.py
Normal file
84
tests/test_rbac.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
"""Tests for src/rbac.py — role-based access control."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_db(tmp_path):
|
||||
os.environ["DATA_DIR"] = str(tmp_path)
|
||||
from src.db import get_system_db
|
||||
from src.repositories.users import UserRepository
|
||||
|
||||
conn = get_system_db()
|
||||
repo = UserRepository(conn)
|
||||
repo.create(id="admin1", email="admin@test.com", name="Admin", role="admin")
|
||||
repo.create(id="analyst1", email="analyst@test.com", name="Analyst", role="analyst")
|
||||
repo.create(id="km1", email="km@test.com", name="KM Admin", role="km_admin")
|
||||
repo.create(id="viewer1", email="viewer@test.com", name="Viewer", role="viewer")
|
||||
conn.close()
|
||||
yield
|
||||
|
||||
|
||||
class TestGetUserRole:
|
||||
def test_admin(self, setup_db):
|
||||
from src.rbac import get_user_role, Role
|
||||
assert get_user_role("admin@test.com") == Role.ADMIN
|
||||
|
||||
def test_analyst(self, setup_db):
|
||||
from src.rbac import get_user_role, Role
|
||||
assert get_user_role("analyst@test.com") == Role.ANALYST
|
||||
|
||||
def test_unknown_user(self, setup_db):
|
||||
from src.rbac import get_user_role, Role
|
||||
assert get_user_role("nobody@test.com") == Role.VIEWER
|
||||
|
||||
|
||||
class TestHasRole:
|
||||
def test_admin_has_all_roles(self, setup_db):
|
||||
from src.rbac import has_role, Role
|
||||
assert has_role("admin@test.com", Role.VIEWER)
|
||||
assert has_role("admin@test.com", Role.ANALYST)
|
||||
assert has_role("admin@test.com", Role.KM_ADMIN)
|
||||
assert has_role("admin@test.com", Role.ADMIN)
|
||||
|
||||
def test_analyst_cant_admin(self, setup_db):
|
||||
from src.rbac import has_role, Role
|
||||
assert has_role("analyst@test.com", Role.ANALYST)
|
||||
assert not has_role("analyst@test.com", Role.ADMIN)
|
||||
|
||||
def test_viewer_is_minimal(self, setup_db):
|
||||
from src.rbac import has_role, Role
|
||||
assert has_role("viewer@test.com", Role.VIEWER)
|
||||
assert not has_role("viewer@test.com", Role.ANALYST)
|
||||
|
||||
|
||||
class TestConvenienceFunctions:
|
||||
def test_is_admin(self, setup_db):
|
||||
from src.rbac import is_admin
|
||||
assert is_admin("admin@test.com")
|
||||
assert not is_admin("analyst@test.com")
|
||||
|
||||
def test_is_km_admin(self, setup_db):
|
||||
from src.rbac import is_km_admin
|
||||
assert is_km_admin("km@test.com")
|
||||
assert is_km_admin("admin@test.com") # admin >= km_admin
|
||||
assert not is_km_admin("analyst@test.com")
|
||||
|
||||
def test_is_analyst(self, setup_db):
|
||||
from src.rbac import is_analyst
|
||||
assert is_analyst("analyst@test.com")
|
||||
assert is_analyst("admin@test.com")
|
||||
assert not is_analyst("viewer@test.com")
|
||||
|
||||
|
||||
class TestSetUserRole:
|
||||
def test_set_role(self, setup_db):
|
||||
from src.rbac import set_user_role, get_user_role, Role
|
||||
assert get_user_role("viewer@test.com") == Role.VIEWER
|
||||
assert set_user_role("viewer@test.com", Role.ADMIN)
|
||||
assert get_user_role("viewer@test.com") == Role.ADMIN
|
||||
|
||||
def test_set_role_nonexistent(self, setup_db):
|
||||
from src.rbac import set_user_role, Role
|
||||
assert not set_user_role("nobody@test.com", Role.ADMIN)
|
||||
|
|
@ -37,7 +37,7 @@ def login_required(f):
|
|||
def admin_required(f):
|
||||
"""Decorator to require admin privileges for a route.
|
||||
|
||||
Recomputes admin status server-side on every request.
|
||||
Checks role in DuckDB users table via src/rbac.py.
|
||||
Returns 403 JSON for API routes, redirect for HTML routes.
|
||||
"""
|
||||
|
||||
|
|
@ -48,13 +48,10 @@ def admin_required(f):
|
|||
return jsonify({"error": "Authentication required"}), 401
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
from .user_service import check_user_exists, get_webapp_username
|
||||
from src.rbac import is_admin
|
||||
|
||||
email = session.get("user", {}).get("email", "")
|
||||
username = get_webapp_username(email)
|
||||
user_info = check_user_exists(username)
|
||||
|
||||
if not user_info.is_admin:
|
||||
if not is_admin(email):
|
||||
if request.path.startswith("/api/"):
|
||||
return jsonify({"error": "Admin access required"}), 403
|
||||
flash("Admin access required.", "error")
|
||||
|
|
@ -68,7 +65,7 @@ def admin_required(f):
|
|||
def km_admin_required(f):
|
||||
"""Decorator to require Corporate Memory admin privileges for a route.
|
||||
|
||||
Checks km_admin flag via corporate_memory_service.is_km_admin().
|
||||
Checks role in DuckDB users table via src/rbac.py.
|
||||
Returns 403 JSON for API routes, redirect for HTML routes.
|
||||
"""
|
||||
|
||||
|
|
@ -79,7 +76,7 @@ def km_admin_required(f):
|
|||
return jsonify({"error": "Authentication required"}), 401
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
from .corporate_memory_service import is_km_admin
|
||||
from src.rbac import is_km_admin
|
||||
|
||||
email = session.get("user", {}).get("email", "")
|
||||
if not is_km_admin(email):
|
||||
|
|
|
|||
Loading…
Reference in a new issue