"""Shared-secret auth path for the in-cluster scheduler service. The scheduler container ships every cron tick to the FastAPI app over HTTP (see ``services.scheduler.__main__``). It needs a long-lived credential to authenticate itself, but minting a real PAT for it requires a logged-in session — chicken-and-egg at first boot. The pragmatic solution: both the ``app`` and ``scheduler`` containers source the same ``.env`` (via Docker Compose ``env_file: .env``). The ``infra/modules/customer-instance/startup-script.sh.tpl`` generates a random ``SCHEDULER_API_TOKEN`` once at VM provisioning and writes it there. When a caller presents that exact secret as ``Authorization: Bearer ``, the app loads (or seeds on demand) a synthetic ``scheduler@system.local`` user that is a member of the ``Admin`` system group — so existing RBAC paths continue to work without special-casing. Constraints on the secret (enforced here, not parsed): - Empty / unset → this auth path is **disabled**. Production deploys should set it; dev / LOCAL_DEV_MODE typically doesn't, since the scheduler rides the dev-bypass instead. - Length < 32 → treated as misconfiguration and disabled. Prevents an operator typo that sets ``SCHEDULER_API_TOKEN=todo`` from accidentally granting admin to a 4-character bearer. - Comparison uses :func:`hmac.compare_digest` — constant-time so a remote caller cannot mount a length-discrimination timing attack. Audit: every action by this user is attributed to ``scheduler@system.local``, visible in ``audit_log`` as a normal admin actor. Rotating the secret is ``edit .env → docker compose restart app scheduler``; no DB write needed. """ from __future__ import annotations import hmac import logging import os import uuid from typing import Optional import duckdb logger = logging.getLogger(__name__) # Identity of the synthetic user that backs the shared-secret auth path. # Kept stable so audit-log entries from the scheduler are easy to filter. SCHEDULER_USER_EMAIL = "scheduler@system.local" SCHEDULER_USER_NAME = "Scheduler" # Floor on the secret length. 32 bytes ≈ 256 bits of entropy if generated # from /dev/urandom; well above the brute-force frontier and well above any # typo a human is plausibly going to make. SCHEDULER_TOKEN_MIN_LENGTH = 32 def get_scheduler_secret() -> str: """Return the configured shared secret, stripped. Empty when disabled.""" return os.environ.get("SCHEDULER_API_TOKEN", "").strip() def is_scheduler_token(token: str) -> bool: """True iff ``token`` exactly matches the configured shared secret. Returns False when the env var is empty or shorter than the minimum length (auth path disabled). Uses constant-time comparison. """ if not token: return False secret = get_scheduler_secret() if not secret or len(secret) < SCHEDULER_TOKEN_MIN_LENGTH: return False return hmac.compare_digest(token, secret) def ensure_scheduler_user(conn: duckdb.DuckDBPyConnection) -> dict: """Idempotently provision the scheduler user + Admin group membership. Called both from the app's startup hook (so the user exists from the very first boot) and lazily from :func:`get_scheduler_user` so a token presented before the next restart of the app still resolves. Returns the user dict in the same shape ``UserRepository.get_by_email`` yields elsewhere — the caller treats it as any other authenticated user. """ from src.db import SYSTEM_ADMIN_GROUP from src.repositories.user_group_members import UserGroupMembersRepository from src.repositories.users import UserRepository repo = UserRepository(conn) user = repo.get_by_email(SCHEDULER_USER_EMAIL) if not user: user_id = str(uuid.uuid4()) repo.create( id=user_id, email=SCHEDULER_USER_EMAIL, name=SCHEDULER_USER_NAME, # No password_hash — this user authenticates via the shared # secret only, never via /auth/login. Keeps the bootstrap # check ("any user has a password?") accurate. password_hash=None, ) # v39: scheduler service user gets the same mandatory tier as # human users. The scheduler's plugin set is rarely consumed # interactively, but keeping the fanout symmetric prevents # surprising drift when an operator inspects this user's stack. try: from src.repositories.user_curated_subscriptions import ( UserCuratedSubscriptionsRepository, ) UserCuratedSubscriptionsRepository( conn ).fanout_system_for_user(user_id) except Exception: logger.exception( "system-plugin fanout failed for scheduler user", ) user = repo.get_by_email(SCHEDULER_USER_EMAIL) logger.info("Seeded scheduler service user: %s", SCHEDULER_USER_EMAIL) admin_group = conn.execute( "SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP], ).fetchone() if admin_group: UserGroupMembersRepository(conn).add_member( user_id=user["id"], group_id=admin_group[0], source="system_seed", added_by="app.auth.scheduler_token:ensure_scheduler_user", ) return user def get_scheduler_user(conn: duckdb.DuckDBPyConnection) -> Optional[dict]: """Look up the scheduler user, seeding it on demand if absent. Returns None only when seeding fails — typically a malformed schema or an out-of-band DB error. The caller (``get_current_user``) maps None to a normal 401 so the failure is observable but does not crash. """ from src.repositories.users import UserRepository user = UserRepository(conn).get_by_email(SCHEDULER_USER_EMAIL) if user: return user try: return ensure_scheduler_user(conn) except Exception as e: # noqa: BLE001 logger.error("Failed to provision scheduler user on demand: %s", e) return None