This squashes 13 commits from ma/staging plus a small docstring translation
into a single coherent unit. Three workstreams.
== RBAC v13 redesign ==
- Drops core.viewer/analyst/km_admin/admin hierarchy and the
internal_roles / group_mappings / user_role_grants / plugin_access tables.
- Replaced by user_group_members + resource_grants. Atomic v12→v13 backfill
wrapped in BEGIN/COMMIT; ROLLBACK leaves schema_version at 12 for retry.
- Two authorization primitives in app.auth.access:
require_admin — Admin-group god-mode
require_resource_access(rt, "{path}") — entity-scoped grants
Single DB lookup per request; no session cache; no implies BFS.
- /admin/access UI (single page) replaces /admin/role-mapping +
/admin/plugin-access. CLI `da admin group/grant *` replaces
`da admin role/mapping/grant-role/revoke-role/effective-roles`.
- ResourceType.TABLE listing-only — admins can record table grants,
runtime enforcement still flows through legacy dataset_permissions
(migration plan in docs/TODO-rbac-data-enforcement.md).
== Claude Code marketplace ==
- Aggregated /marketplace.zip + /marketplace.git/* (PAT-gated,
RBAC-filtered, content-addressed cache via dulwich).
- Admin god-mode dropped on the marketplace surface — admins curate
their own view via grants like everyone else.
- Bare-repo cache materializes per RBAC-filtered ETag; stale entries
not pruned in this iteration (disclaimed in git_backend.py docstring).
== #81 #83 #44 security/ops hardening ==
- #81 Group A — orchestrator ATTACH allow-listing (extension/url/alias).
- #81 Group B — Keboola extractor 3-state exit codes:
0 success / 1 total fail / 2 PARTIAL fail
Sync API logs PARTIAL FAILURE alert on exit 2. Operators with binary
alerting must teach it the new partial signal.
- #81 Group C — schema v10 view_ownership; rejects silent overwrite
of a prior connector's view name on collision.
- #81 Group D — extractor-side identifier validation.
- #83 — Jira webhook fail-closed when JIRA_WEBHOOK_SECRET unset
+ path-traversal fix.
- #44 — entire /api/scripts/* surface is admin-only (planted-script +
sandbox-bypass risk closed).
== Web UI polish + deploy fix ==
- /admin/access: live grant-count badges (no stale snapshot revert),
shared-header CSS link added to /catalog and /admin/{tables,permissions},
per-resource-type colored stripes.
- docker-compose.host-mount.yml: bind,rbind so dual-disk hosts don't
silently shadow sub-mounts and write state to the wrong disk.
== OSS vendor-neutralization (waves 1+2) ==
- scripts/grpn/ → scripts/ops/. Customer-specific identifiers
(project IDs, internal hostnames, dev/prod VM IPs, brand names)
replaced with placeholders across code, docs, Terraform, Caddyfile,
OAuth probe, and planning docs. Downstream infra repos that copied
scripts/grpn/agnes-tls-rotate.sh or agnes-auto-upgrade.sh must
update the path.
== Translation ==
- src/repositories/user_groups.py::ensure_system docstring translated
from Czech to English for codebase consistency.
Co-authored-by: Mina Rustamyan <mina@keboola.com>
165 lines
5.5 KiB
Python
165 lines
5.5 KiB
Python
"""Scheduler service — replaces systemd timers.
|
|
|
|
Lightweight sidecar that fires scheduled jobs. Two job kinds:
|
|
- "http": POST/GET an endpoint on the main app (e.g. data-refresh).
|
|
- "fn": call a Python function in-process (e.g. marketplaces sync).
|
|
|
|
Schedules are strings parsed by src.scheduler.is_table_due — accepts
|
|
"every 15m", "every 1h", "daily 03:00", "daily 07:00,13:00".
|
|
|
|
Usage: python -m services.scheduler
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import signal
|
|
import time
|
|
from datetime import datetime, timezone
|
|
|
|
import httpx
|
|
|
|
from src.scheduler import is_table_due
|
|
|
|
logging.basicConfig(
|
|
level=os.environ.get("LOG_LEVEL", "INFO").upper(),
|
|
format="%(asctime)s %(levelname)s [scheduler] %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
API_URL = os.environ.get("API_URL", "http://localhost:8000")
|
|
SCHEDULER_API_TOKEN = os.environ.get("SCHEDULER_API_TOKEN", "")
|
|
|
|
_token_warning_emitted = False
|
|
|
|
|
|
|
|
def _get_auth_token() -> str:
|
|
"""Return the bearer token for API calls.
|
|
|
|
Production: ``SCHEDULER_API_TOKEN`` env var carries a long-lived PAT
|
|
minted via ``/tokens`` for a service-account user with the roles the
|
|
jobs need (typically ``core.admin`` for sync triggers). Set it.
|
|
|
|
Dev / LOCAL_DEV_MODE: leave it unset. The scheduler returns the empty
|
|
string and calls the API without an ``Authorization`` header — the
|
|
API's dev-bypass auto-authenticates the request as the dev user.
|
|
|
|
The previous implementation tried to auto-fetch a token by POSTing to
|
|
``/auth/token`` with just the seed admin's email. That endpoint
|
|
requires email + password (or rejects external-auth accounts that
|
|
have no local password), so the call always 401-ed and the scheduler
|
|
log was noisy with one access-log line per cron tick. Removed in
|
|
favor of explicit configuration: either set the PAT or rely on
|
|
LOCAL_DEV_MODE.
|
|
"""
|
|
global _token_warning_emitted
|
|
if SCHEDULER_API_TOKEN:
|
|
return SCHEDULER_API_TOKEN
|
|
if not _token_warning_emitted:
|
|
logger.warning(
|
|
"SCHEDULER_API_TOKEN is not set — calling the API without "
|
|
"Authorization. Required in production; in LOCAL_DEV_MODE "
|
|
"the dev-bypass auto-authenticates and this is fine."
|
|
)
|
|
_token_warning_emitted = True
|
|
return ""
|
|
|
|
|
|
def _marketplaces_job():
|
|
"""Entry point for the nightly marketplaces sync.
|
|
|
|
Imported lazily so the scheduler container still starts even if the
|
|
module has an import-time issue in development — a failure here only
|
|
kills one job, not the whole loop.
|
|
"""
|
|
from src.marketplace import sync_marketplaces
|
|
return sync_marketplaces()
|
|
|
|
|
|
# Schedule definitions: (name, schedule_string, kind, target)
|
|
# kind = "http" -> target = (endpoint, method)
|
|
# kind = "fn" -> target = callable_returning_any
|
|
JOBS = [
|
|
("data-refresh", "every 15m", "http", ("/api/sync/trigger", "POST")),
|
|
("health-check", "every 5m", "http", ("/api/health", "GET")),
|
|
("marketplaces", "daily 03:00", "fn", _marketplaces_job),
|
|
]
|
|
|
|
_running = True
|
|
|
|
|
|
def _signal_handler(sig, frame):
|
|
global _running
|
|
logger.info(f"Received signal {sig}, shutting down...")
|
|
_running = False
|
|
|
|
|
|
def _call_api(endpoint: str, method: str = "POST") -> bool:
|
|
"""Call the main app API. Returns True on success."""
|
|
url = f"{API_URL}{endpoint}"
|
|
headers = {}
|
|
token = _get_auth_token()
|
|
if token:
|
|
headers["Authorization"] = f"Bearer {token}"
|
|
try:
|
|
if method == "POST":
|
|
resp = httpx.post(url, headers=headers, timeout=120)
|
|
else:
|
|
resp = httpx.get(url, headers=headers, timeout=30)
|
|
if resp.status_code < 400:
|
|
logger.info(f"Job {endpoint}: {resp.status_code}")
|
|
return True
|
|
else:
|
|
logger.warning(f"Job {endpoint}: HTTP {resp.status_code} - {resp.text[:200]}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Job {endpoint} failed: {e}")
|
|
return False
|
|
|
|
|
|
def _call_fn(label: str, fn) -> bool:
|
|
"""Run an in-process callable. Returns True on success."""
|
|
try:
|
|
result = fn()
|
|
logger.info("Job %s OK: %s", label, result)
|
|
return True
|
|
except Exception as e:
|
|
logger.error("Job %s failed: %s", label, e)
|
|
return False
|
|
|
|
|
|
def run():
|
|
signal.signal(signal.SIGTERM, _signal_handler)
|
|
signal.signal(signal.SIGINT, _signal_handler)
|
|
|
|
logger.info(f"Scheduler started. API_URL={API_URL}, {len(JOBS)} jobs configured.")
|
|
|
|
# Track last successful run per job as ISO string — matches what
|
|
# src.scheduler.is_table_due expects.
|
|
last_run: dict[str, str | None] = {name: None for name, *_ in JOBS}
|
|
|
|
while _running:
|
|
now_iso = datetime.now(timezone.utc).isoformat()
|
|
for name, schedule, kind, target in JOBS:
|
|
if not is_table_due(schedule, last_run[name]):
|
|
continue
|
|
logger.info("Running job: %s (%s)", name, schedule)
|
|
if kind == "http":
|
|
endpoint, method = target
|
|
ok = _call_api(endpoint, method)
|
|
elif kind == "fn":
|
|
ok = _call_fn(name, target)
|
|
else:
|
|
logger.error("Unknown job kind %r for %s", kind, name)
|
|
ok = False
|
|
if ok:
|
|
last_run[name] = now_iso
|
|
# 30s tick is plenty: interval jobs have minute-level resolution,
|
|
# daily jobs have a ~24 h retry window.
|
|
time.sleep(30)
|
|
|
|
logger.info("Scheduler stopped.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run()
|