fix(security): #81 Group A — orchestrator attach hardening (squashed) (#95)

Closes the C1 findings from issue #81 plus the round-3/4 follow-ups on the read-only query path. Both _attach_remote_extensions (rebuild path) and _reattach_remote_extensions (query path) now apply the same hard allowlists for extensions and token-env names, single-quote-escape the URL, and split built-in vs community install. The CHANGELOG bullet documents the full scope including the table_schema → table_catalog fix that made the rebuild path a silent no-op for every connector. New module src/orchestrator_security.py centralises the policy. Tests in tests/test_orchestrator_remote_attach_security.py — 28/28 pass. Refs #81.
2026-04-27 21:34:04 +02:00 · 2026-04-27 21:34:04 +02:00 · 23be8ad46f
commit 23be8ad46f
parent 24e81fb671
7 changed files with 614 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -104,6 +104,35 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
  `[A-Za-z0-9_-]` (dot deliberately excluded to defeat `..` survival),
  clips length to 64 chars, and routes the final filename through
  `safe_join_under`.
+- **Security (CRITICAL)**: hardened the connector → orchestrator trust
+  boundary on BOTH the rebuild path
+  (`src/orchestrator.py::_attach_remote_extensions`) AND the read-only
+  query path (`src/db.py::_reattach_remote_extensions`, called by
+  `get_analytics_db_readonly()` on every request) — issue #81 Group A.
+  Three fixes: (1) DuckDB extensions referenced by `_remote_attach` are
+  matched against a hard allowlist (default: `keboola, bigquery`;
+  override via `AGNES_REMOTE_ATTACH_EXTENSIONS`). Install path splits
+  built-in (LOAD only) from community (`INSTALL FROM community; LOAD`
+  on rebuild path; LOAD only on the read-only query path which must
+  not touch the network). (2) `token_env` names are matched against a
+  hard allowlist (default: `KBC_TOKEN`, `KBC_STORAGE_TOKEN`,
+  `KEBOOLA_STORAGE_TOKEN`, `GOOGLE_APPLICATION_CREDENTIALS`; override
+  via `AGNES_REMOTE_ATTACH_TOKEN_ENVS`). Names must additionally match
+  `^[A-Z][A-Z0-9_]{0,63}$`. A malicious connector cannot ask the
+  orchestrator to read `JWT_SECRET_KEY` / `SESSION_SECRET` /
+  `OPENAI_API_KEY` and exfiltrate them via `ATTACH ... TOKEN`.
+  (3) The URL passed to `ATTACH` is now single-quote-escaped on both
+  paths. Also fixed a `table_schema` vs `table_catalog` mismatch that
+  silently no-op'd `_attach_remote_extensions` for every connector
+  (the rebuild-path hardening would have been moot in production
+  without this fix). New module `src/orchestrator_security.py`
+  centralises the policy and exposes `log_effective_policy()`, called
+  from app startup so an operator's typo in
+  `AGNES_REMOTE_ATTACH_EXTENSIONS` (which **replaces** the default,
+  not extends it — a setting of `httpfs` would silently lock out
+  `keboola, bigquery`) is visible at boot rather than at the next
+  failed attach. See
+  `docs/superpowers/plans/2026-04-27-issue-81-trust-boundary.md`.

 ### Removed

--- a/app/main.py
+++ b/app/main.py
@ -105,6 +105,14 @@ logger = logging.getLogger(__name__)

@asynccontextmanager
 async def lifespan(app):
+    # Issue #81 Group A — log the effective remote_attach allowlist at
+    # startup so an operator's typo in AGNES_REMOTE_ATTACH_EXTENSIONS
+    # (which REPLACES, not extends, the default) is visible.
+    try:
+        from src.orchestrator_security import log_effective_policy
+        log_effective_policy()
+    except Exception:
+        pass  # never block startup on a logging convenience
    yield
    from src.db import close_system_db
    close_system_db()
--- a/src/db.py
+++ b/src/db.py
@ -395,6 +395,18 @@ def _reattach_remote_extensions(
        except Exception:
            pass

+        # Issue #81 Group A — apply the same allowlist policy on the
+        # query path that the orchestrator's rebuild path uses. Without
+        # this, a malicious connector's _remote_attach row exfiltrates
+        # JWT_SECRET_KEY / SESSION_SECRET / OPENAI_API_KEY on every
+        # query, defeating the rebuild-path hardening entirely.
+        from src.orchestrator_security import (
+            escape_sql_string_literal,
+            is_builtin_extension,
+            is_extension_allowed,
+            is_token_env_allowed,
+        )
+
        for alias, extension, url, token_env in rows:
            if not _SAFE_IDENTIFIER.match(alias or ""):
                logger.debug("Skipping unsafe remote_attach alias: %r", alias)
@ -402,15 +414,40 @@ def _reattach_remote_extensions(
            if not _SAFE_IDENTIFIER.match(extension or ""):
                logger.debug("Skipping unsafe remote_attach extension: %r", extension)
                continue
+            if not is_extension_allowed(extension):
+                logger.error(
+                    "query-path remote_attach: extension %r not in allowlist; "
+                    "refusing to LOAD/ATTACH for source %s. Override via "
+                    "AGNES_REMOTE_ATTACH_EXTENSIONS if intended.",
+                    extension, alias,
+                )
+                continue
+            if token_env and not is_token_env_allowed(token_env):
+                logger.error(
+                    "query-path remote_attach: token_env %r not in allowlist; "
+                    "refusing for source %s. Override via "
+                    "AGNES_REMOTE_ATTACH_TOKEN_ENVS if intended.",
+                    token_env, alias,
+                )
+                continue
            if alias in attached_dbs:
                logger.debug("Remote source %s already attached, skipping", alias)
                continue
            try:
+                # LOAD only on the read-only query path — no INSTALL.
+                # Per the function docstring, this path runs on every
+                # query request and must not touch the network. The
+                # rebuild path (orchestrator) is responsible for INSTALL;
+                # by the time a query lands here, any community extension
+                # we'll see is already on disk. If LOAD fails because the
+                # extension isn't installed, log + skip (caller will see
+                # missing remote views and the operator will trigger a
+                # rebuild).
                conn.execute(f"LOAD {extension};")
                token = os.environ.get(token_env, "") if token_env else ""
-                safe_url = url.replace("'", "''")
+                safe_url = escape_sql_string_literal(url)
                if token:
-                    escaped_token = token.replace("'", "''")
+                    escaped_token = escape_sql_string_literal(token)
                    conn.execute(
                        f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
                    )
--- a/src/orchestrator.py
+++ b/src/orchestrator.py
@ -27,6 +27,13 @@ from typing import Dict, List

 import duckdb

+from src.orchestrator_security import (
+    escape_sql_string_literal,
+    is_builtin_extension,
+    is_extension_allowed,
+    is_token_env_allowed,
+)
+
 logger = logging.getLogger(__name__)

 _rebuild_lock = threading.Lock()
@ -202,9 +209,17 @@ class SyncOrchestrator:
    ) -> None:
        """Read _remote_attach from extract.duckdb and ATTACH external sources."""
        try:
+            # DuckDB attached-DB layout: ATTACH 'extract.duckdb' AS <alias>
+            # exposes information_schema.tables with table_catalog=<alias>
+            # and table_schema='main'. The earlier draft used
+            # table_schema=<alias> here, which never matched and made
+            # _attach_remote_extensions a silent no-op for every
+            # connector — defeating the entire Group A hardening in
+            # production. db.py:_reattach_remote_extensions already used
+            # the correct column; this aligns the rebuild path.
            tables = conn.execute(
                f"SELECT table_name FROM information_schema.tables "
-                f"WHERE table_schema='{source_name}' AND table_name='_remote_attach'"
+                f"WHERE table_catalog='{source_name}' AND table_name='_remote_attach'"
            ).fetchall()
            if not tables:
                return
@ -216,11 +231,34 @@ class SyncOrchestrator:
        ).fetchall()

        for alias, extension, url, token_env in rows:
+            # Identifier sanity (defense against weird input). The hard
+            # security boundary is the allowlist a few lines down.
            if not _validate_identifier(alias, "remote_attach alias"):
                continue
            if not _validate_identifier(extension, "remote_attach extension"):
                continue

+            # #81 Group A.1 — extension allowlist. The connector does NOT
+            # get to pick what extensions the orchestrator loads.
+            if not is_extension_allowed(extension):
+                logger.error(
+                    "Remote attach %s: extension %r is not in the allowlist; refusing. "
+                    "Override via AGNES_REMOTE_ATTACH_EXTENSIONS if intended.",
+                    alias, extension,
+                )
+                continue
+
+            # #81 Group A.2 — token-env hard allowlist. Refuses well-known
+            # runtime secrets (JWT_SECRET_KEY, OPENAI_API_KEY, …) that a
+            # malicious connector might ask us to send to its server.
+            if token_env and not is_token_env_allowed(token_env):
+                logger.error(
+                    "Remote attach %s: token_env %r is not in the allowlist; refusing. "
+                    "Override via AGNES_REMOTE_ATTACH_TOKEN_ENVS if intended.",
+                    alias, token_env,
+                )
+                continue
+
            token = os.environ.get(token_env, "") if token_env else ""
            if token_env and not token:
                logger.warning(
@ -239,16 +277,22 @@ class SyncOrchestrator:
                    logger.debug("Remote source %s already attached", alias)
                    continue

+                # #81 Group A.1 — built-ins LOAD only; community needs INSTALL+LOAD.
+                if is_builtin_extension(extension):
+                    conn.execute(f"LOAD {extension};")
+                else:
                    conn.execute(f"INSTALL {extension} FROM community; LOAD {extension};")
+                # #81 Group A.3 — escape URL single-quotes (mirrors src/db.py).
+                safe_url = escape_sql_string_literal(url)
                if token:
-                    escaped_token = token.replace("'", "''")
+                    escaped_token = escape_sql_string_literal(token)
                    conn.execute(
-                        f"ATTACH '{url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
+                        f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, TOKEN '{escaped_token}')"
                    )
                else:
                    # Extensions like BigQuery handle auth via env (e.g. GOOGLE_APPLICATION_CREDENTIALS)
                    conn.execute(
-                        f"ATTACH '{url}' AS {alias} (TYPE {extension}, READ_ONLY)"
+                        f"ATTACH '{safe_url}' AS {alias} (TYPE {extension}, READ_ONLY)"
                    )
                logger.info("Attached remote source %s via %s extension", alias, extension)
            except Exception as e:
--- a/src/orchestrator_security.py
+++ b/src/orchestrator_security.py
@ -0,0 +1,128 @@
+"""Allowlists and policy for the connector → orchestrator trust boundary.
+
+The orchestrator reads `_remote_attach` rows that connectors write into their
+`extract.duckdb`, then calls `INSTALL`, `LOAD`, and `ATTACH` based on those
+values. Treating the connector as adversarial (compromised image, supply-chain,
+malicious fork) means the orchestrator picks **what** can be installed and
+**which** env vars can be referenced — not the connector. See
+`docs/superpowers/plans/2026-04-27-issue-81-trust-boundary.md` for the full
+threat model.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+
+logger = logging.getLogger(__name__)
+
+# DuckDB extensions the orchestrator is willing to load on behalf of a
+# connector. Built-in extensions go in `_BUILTIN_EXTENSIONS`; community
+# extensions go in `_COMMUNITY_EXTENSIONS`. The two sets are disjoint and
+# tell the install path whether to issue `INSTALL ... FROM community` or
+# only `LOAD`.
+_BUILTIN_EXTENSIONS: frozenset[str] = frozenset()  # none in current OSS
+_COMMUNITY_EXTENSIONS: frozenset[str] = frozenset({
+    "keboola",
+    "bigquery",
+})
+
+# Env vars whose values may be passed as the auth `TOKEN` in `ATTACH`. The
+# default is intentionally tight — every name in the runtime env that is not
+# on this list cannot be exfiltrated to a connector-controlled URL.
+# Operators add deployment-specific names via AGNES_REMOTE_ATTACH_TOKEN_ENVS.
+_DEFAULT_TOKEN_ENVS: frozenset[str] = frozenset({
+    "KBC_TOKEN",
+    "KBC_STORAGE_TOKEN",
+    "KEBOOLA_STORAGE_TOKEN",
+    "GOOGLE_APPLICATION_CREDENTIALS",  # path, not a secret value
+})
+
+# Names must additionally match this regex (defense against weird input).
+_ENV_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]{0,63}$")
+
+
+def _parse_csv_env(name: str) -> set[str]:
+    """Parse a comma-separated env var into a stripped set of non-empty tokens."""
+    raw = os.environ.get(name, "")
+    return {t.strip() for t in raw.split(",") if t.strip()}
+
+
+def get_allowed_extensions() -> dict[str, set[str]]:
+    """Return the effective extension allowlist as a dict of {kind: set}.
+
+    `kind` is "builtin" or "community" — the install path needs to know
+    which to use. Operator override AGNES_REMOTE_ATTACH_EXTENSIONS replaces
+    the default community set; built-ins are not configurable from env (a
+    typo there would silently disable a working integration with no clear
+    failure mode, and built-ins do not pose a supply-chain risk).
+    """
+    override = _parse_csv_env("AGNES_REMOTE_ATTACH_EXTENSIONS")
+    community = override if override else set(_COMMUNITY_EXTENSIONS)
+    return {"builtin": set(_BUILTIN_EXTENSIONS), "community": community}
+
+
+def is_extension_allowed(extension: str) -> bool:
+    allow = get_allowed_extensions()
+    return extension in allow["builtin"] or extension in allow["community"]
+
+
+def is_builtin_extension(extension: str) -> bool:
+    return extension in get_allowed_extensions()["builtin"]
+
+
+def get_allowed_token_envs() -> set[str]:
+    """Return the effective token-env allowlist.
+
+    Operator override AGNES_REMOTE_ATTACH_TOKEN_ENVS *replaces* the default
+    set (so an operator can shrink it as well as expand it). The startup
+    code logs the effective set so a typo is visible.
+    """
+    override = _parse_csv_env("AGNES_REMOTE_ATTACH_TOKEN_ENVS")
+    return override if override else set(_DEFAULT_TOKEN_ENVS)
+
+
+def is_token_env_allowed(token_env: str) -> bool:
+    """Return True if ``token_env`` may be read and passed as a TOKEN.
+
+    Two checks: structural (`^[A-Z][A-Z0-9_]{0,63}$`) and membership in the
+    allowlist. The structural check refuses things that aren't a valid env
+    var name regardless of allowlist contents.
+    """
+    if not isinstance(token_env, str) or not _ENV_NAME_RE.match(token_env):
+        return False
+    return token_env in get_allowed_token_envs()
+
+
+def log_effective_policy() -> None:
+    """Log the effective extension + token-env allowlists at INFO once.
+
+    Called from app startup. Makes operator typos visible — if
+    AGNES_REMOTE_ATTACH_EXTENSIONS=httpfs is set with the intent to ADD
+    httpfs (but the override REPLACES the default), the operator sees
+    'effective extension allowlist: {httpfs}' and notices keboola and
+    bigquery are missing. Idempotent — safe to call multiple times.
+    """
+    ext = get_allowed_extensions()
+    envs = get_allowed_token_envs()
+    has_ext_override = bool(_parse_csv_env("AGNES_REMOTE_ATTACH_EXTENSIONS"))
+    has_env_override = bool(_parse_csv_env("AGNES_REMOTE_ATTACH_TOKEN_ENVS"))
+    logger.info(
+        "remote_attach policy: extensions=%s (override=%s), token_envs=%s (override=%s). "
+        "Note: env-var overrides REPLACE the default — set both yours and the "
+        "defaults if you want to add to them.",
+        sorted(ext["community"] | ext["builtin"]),
+        has_ext_override,
+        sorted(envs),
+        has_env_override,
+    )
+
+
+def escape_sql_string_literal(value: str) -> str:
+    """Double single-quotes for safe use inside DuckDB single-quoted literals.
+
+    Mirrors `src/db.py:_attach_extracts` (line ~411) so the read-only query
+    path and the orchestrator rebuild path use the same escape.
+    """
+    return value.replace("'", "''")
--- a/tests/test_db_remote_attach_security.py
+++ b/tests/test_db_remote_attach_security.py
@ -0,0 +1,142 @@
+"""Issue #81 Group A — query-path (db.py) trust-boundary tests.
+
+Mirror of `tests/test_orchestrator_remote_attach_security.py` for
+`src/db.py:_reattach_remote_extensions`. The query path runs on every
+`/api/query` request via `get_analytics_db_readonly()`; it must enforce
+the same allowlists as the rebuild path or the security guarantee is
+hollow.
+
+Setup is real-DuckDB (not mock-conn) because db.py introspects via
+`information_schema.tables`/`duckdb_databases()` rather than just
+executing whatever SQL we hand it. We feed it a real extract.duckdb
+with a programmable `_remote_attach` row, ATTACH it, then call the
+function and assert which `LOAD/ATTACH` SQL fired (or didn't) by
+sniffing connection state.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+import duckdb
+import pytest
+
+from src.db import _reattach_remote_extensions
+
+
+def _make_extract_with_remote_attach(
+    path: Path, alias: str, extension: str, url: str, token_env: str
+) -> None:
+    """Create a tiny extract.duckdb whose _remote_attach table has one row."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if path.exists():
+        path.unlink()
+    wal = path.with_suffix(".duckdb.wal")
+    if wal.exists():
+        wal.unlink()
+    c = duckdb.connect(str(path))
+    c.execute(
+        "CREATE TABLE _remote_attach ("
+        "alias VARCHAR, extension VARCHAR, url VARCHAR, token_env VARCHAR)"
+    )
+    c.execute(
+        "INSERT INTO _remote_attach VALUES (?, ?, ?, ?)",
+        [alias, extension, url, token_env],
+    )
+    c.close()
+
+
+def _attach_and_call(extracts_dir: Path, source_name: str):
+    """ATTACH the source's extract.duckdb to a fresh memory conn, run the
+    function, return the conn (so the test can introspect attached_dbs)."""
+    conn = duckdb.connect()
+    conn.execute(
+        f"ATTACH '{extracts_dir / source_name / 'extract.duckdb'}' "
+        f"AS {source_name} (READ_ONLY)"
+    )
+    _reattach_remote_extensions(conn, extracts_dir)
+    return conn
+
+
+def _attached(conn) -> set[str]:
+    return {r[0] for r in conn.execute("SELECT database_name FROM duckdb_databases()").fetchall()}
+
+
+class TestQueryPathExtensionAllowlist:
+    def test_refuses_unknown_extension(self, tmp_path, caplog):
+        """A connector that requested `httpfs` (not on allowlist) is
+        refused — `httpfs` does not appear among attached databases."""
+        _make_extract_with_remote_attach(
+            tmp_path / "extracts" / "src1" / "extract.duckdb",
+            alias="ext_alias", extension="httpfs",
+            url="https://x", token_env="",
+        )
+        with caplog.at_level(logging.ERROR):
+            conn = _attach_and_call(tmp_path / "extracts", "src1")
+        assert "ext_alias" not in _attached(conn)
+        assert any("not in allowlist" in r.message for r in caplog.records)
+
+
+class TestQueryPathTokenEnvAllowlist:
+    def test_refuses_session_secret(self, tmp_path, monkeypatch, caplog):
+        monkeypatch.setenv("SESSION_SECRET", "shouldnt-leak")
+        _make_extract_with_remote_attach(
+            tmp_path / "extracts" / "src1" / "extract.duckdb",
+            alias="kbc", extension="keboola",
+            url="https://x", token_env="SESSION_SECRET",
+        )
+        with caplog.at_level(logging.ERROR):
+            conn = _attach_and_call(tmp_path / "extracts", "src1")
+        assert "kbc" not in _attached(conn)
+        assert any(
+            "token_env" in r.message and "not in allowlist" in r.message
+            for r in caplog.records
+        )
+
+    def test_refuses_jwt_secret_key(self, tmp_path, monkeypatch, caplog):
+        monkeypatch.setenv("JWT_SECRET_KEY", "x" * 64)
+        _make_extract_with_remote_attach(
+            tmp_path / "extracts" / "src1" / "extract.duckdb",
+            alias="kbc", extension="keboola",
+            url="https://x", token_env="JWT_SECRET_KEY",
+        )
+        with caplog.at_level(logging.ERROR):
+            conn = _attach_and_call(tmp_path / "extracts", "src1")
+        assert "kbc" not in _attached(conn)
+
+
+class TestQueryPathInstallStrategy:
+    def test_no_install_on_query_path(self, tmp_path, monkeypatch, caplog):
+        """The query path must NOT issue INSTALL FROM community — it runs
+        on every read request and shouldn't touch the network. LOAD
+        without prior INSTALL fails for missing extensions, which is the
+        documented behaviour (operator is told to trigger a rebuild)."""
+        # We can't easily verify "no INSTALL" without intercepting SQL;
+        # instead, verify the query path doesn't hit the community
+        # registry by setting an extension that's NOT on the default
+        # allowlist nor pre-installed. The function should refuse
+        # before any LOAD attempt.
+        _make_extract_with_remote_attach(
+            tmp_path / "extracts" / "src1" / "extract.duckdb",
+            alias="bad", extension="some_other_community_ext",
+            url="https://x", token_env="",
+        )
+        conn = _attach_and_call(tmp_path / "extracts", "src1")
+        assert "bad" not in _attached(conn)
+
+
+class TestQueryPathOverride:
+    def test_override_replaces_default(self, tmp_path, monkeypatch):
+        """Setting AGNES_REMOTE_ATTACH_TOKEN_ENVS=MY_TOKEN replaces the
+        default — KBC_TOKEN no longer accepted. (Operator-typo defense
+        contract; mirrored from rebuild-path tests.)"""
+        monkeypatch.setenv("AGNES_REMOTE_ATTACH_TOKEN_ENVS", "MY_TOKEN")
+        monkeypatch.setenv("MY_TOKEN", "value")
+        _make_extract_with_remote_attach(
+            tmp_path / "extracts" / "src1" / "extract.duckdb",
+            alias="kbc", extension="keboola",
+            url="https://x", token_env="KBC_TOKEN",  # NOT in the override set
+        )
+        conn = _attach_and_call(tmp_path / "extracts", "src1")
+        assert "kbc" not in _attached(conn)
--- a/tests/test_orchestrator_remote_attach_security.py
+++ b/tests/test_orchestrator_remote_attach_security.py
@ -0,0 +1,219 @@
+"""Issue #81 Group A — connector → orchestrator trust-boundary tests.
+
+The orchestrator reads `_remote_attach` rows that connectors write into their
+extract.duckdb, then runs `INSTALL`/`LOAD`/`ATTACH` SQL. This file exercises
+each of the C1 hardening fixes:
+
+- A.1 extension allowlist (refuse non-allowlisted extension)
+- A.2 token-env hard allowlist (refuse well-known runtime secrets)
+- A.3 URL single-quote escape (no injection through the URL literal)
+- Built-in vs community install path split
+
+Each test writes a malicious `_remote_attach` row into a fixture
+extract.duckdb and asserts that the orchestrator either refuses (no ATTACH
+call) or issues a safely-escaped one. We capture SQL strings via a fake
+DuckDB connection so the assertions don't depend on any real extension being
+installed.
+"""
+
+import logging
+from unittest.mock import MagicMock
+
+import duckdb
+import pytest
+
+from src.orchestrator import SyncOrchestrator
+from src.orchestrator_security import escape_sql_string_literal
+
+
+@pytest.fixture
+def captured_conn(monkeypatch, tmp_path):
+    """A duckdb.Connection-like mock that records every execute() string."""
+    sql_calls: list[str] = []
+    conn = MagicMock()
+
+    # information_schema.tables → return _remote_attach exists
+    # _remote_attach rows are programmed per-test via attach_rows()
+    rows_buffer = {"attach": []}
+
+    def execute_side_effect(sql, *args, **kwargs):
+        sql_calls.append(sql)
+        result = MagicMock()
+        # information_schema query
+        if "information_schema.tables" in sql and "_remote_attach" in sql:
+            result.fetchall.return_value = [("_remote_attach",)]
+        elif "FROM" in sql and "_remote_attach" in sql:
+            result.fetchall.return_value = list(rows_buffer["attach"])
+        elif "duckdb_databases" in sql:
+            result.fetchall.return_value = []  # nothing attached yet
+        else:
+            result.fetchall.return_value = []
+        return result
+
+    conn.execute.side_effect = execute_side_effect
+
+    def set_attach_rows(rows):
+        rows_buffer["attach"] = rows
+
+    return conn, sql_calls, set_attach_rows
+
+
+def _attach_call_count(sql_calls: list[str]) -> int:
+    """Number of ATTACH statements actually issued against the conn."""
+    return sum(1 for s in sql_calls if s.lstrip().upper().startswith("ATTACH "))
+
+
+class TestExtensionAllowlist:
+    def test_refuses_unknown_extension(self, captured_conn, caplog):
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "httpfs", "https://x", "")])
+        with caplog.at_level(logging.ERROR):
+            SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 0
+        assert any("not in the allowlist" in r.message for r in caplog.records)
+
+    def test_allows_keboola(self, captured_conn, monkeypatch):
+        monkeypatch.setenv("KBC_TOKEN", "secret-token-value")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("kbc", "keboola", "https://example.keboola.com", "KBC_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 1
+
+
+class TestTokenEnvAllowlist:
+    def test_refuses_session_secret(self, captured_conn, caplog, monkeypatch):
+        # Even if SESSION_SECRET were set in env (it shouldn't be in tests),
+        # the allowlist refuses to read it.
+        monkeypatch.setenv("SESSION_SECRET", "super-secret-jwt-signing-key")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "keboola", "https://x", "SESSION_SECRET")])
+        with caplog.at_level(logging.ERROR):
+            SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 0
+        assert any("token_env" in r.message and "not in the allowlist" in r.message
+                   for r in caplog.records)
+
+    def test_refuses_jwt_secret_key(self, captured_conn, monkeypatch, caplog):
+        monkeypatch.setenv("JWT_SECRET_KEY", "x" * 64)
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "keboola", "https://x", "JWT_SECRET_KEY")])
+        with caplog.at_level(logging.ERROR):
+            SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 0
+
+    def test_refuses_arbitrary_custom_token_env(self, captured_conn, monkeypatch, caplog):
+        # Names that pass the structural regex but aren't on the allowlist
+        # are still refused — defense against accidental exposure.
+        monkeypatch.setenv("MY_RANDOM_TOKEN", "value")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "keboola", "https://x", "MY_RANDOM_TOKEN")])
+        with caplog.at_level(logging.ERROR):
+            SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 0
+
+    def test_operator_override_replaces_default(
+        self, captured_conn, monkeypatch
+    ):
+        monkeypatch.setenv("AGNES_REMOTE_ATTACH_TOKEN_ENVS", "MY_RANDOM_TOKEN")
+        monkeypatch.setenv("MY_RANDOM_TOKEN", "value")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "keboola", "https://x", "MY_RANDOM_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 1
+
+    def test_empty_string_override_falls_back_to_default(
+        self, captured_conn, monkeypatch
+    ):
+        """AGNES_REMOTE_ATTACH_TOKEN_ENVS='' should NOT lock everything down —
+        it falls through to the default. (Operator-typo defense.)"""
+        monkeypatch.setenv("AGNES_REMOTE_ATTACH_TOKEN_ENVS", "")
+        monkeypatch.setenv("KBC_TOKEN", "value")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("alias1", "keboola", "https://x", "KBC_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 1
+
+    def test_empty_token_env_skips_check(self, captured_conn, monkeypatch):
+        """token_env='' (the BigQuery-style env-auth path) skips the
+        allowlist check entirely. Verifies the BQ flow keeps working."""
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("bq", "bigquery", "project=x", "")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        assert _attach_call_count(sql_calls) == 1
+
+    def test_structurally_invalid_token_env_refused(
+        self, captured_conn, monkeypatch, caplog
+    ):
+        """Even names on the allowlist via override must pass the structural
+        regex `^[A-Z][A-Z0-9_]{0,63}$`. A name with a space or lowercase
+        letter is refused regardless of allowlist contents."""
+        # Try to add a structurally-invalid name to the allowlist via
+        # override; the regex inside is_token_env_allowed must still refuse.
+        monkeypatch.setenv("AGNES_REMOTE_ATTACH_TOKEN_ENVS", "kbc_token,KBC TOKEN,LEGIT_TOKEN")
+        monkeypatch.setenv("LEGIT_TOKEN", "value")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([
+            ("a1", "keboola", "https://x", "kbc_token"),    # lowercase
+            ("a2", "keboola", "https://x", "KBC TOKEN"),    # space
+            ("a3", "keboola", "https://x", "LEGIT_TOKEN"),  # OK
+        ])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        # Only a3 should attach; a1 and a2 fail the structural regex even
+        # though the operator listed them in the override.
+        assert _attach_call_count(sql_calls) == 1
+
+
+class TestUrlEscape:
+    def test_single_quote_in_url_is_escaped(self, captured_conn, monkeypatch):
+        monkeypatch.setenv("KBC_TOKEN", "tok")
+        conn, sql_calls, set_rows = captured_conn
+        # Adversarial URL trying to break out of the literal.
+        adversarial_url = "https://example.com'); DROP DATABASE x; --"
+        set_rows([("kbc", "keboola", adversarial_url, "KBC_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        attach_sqls = [s for s in sql_calls if s.lstrip().upper().startswith("ATTACH ")]
+        assert len(attach_sqls) == 1
+        # The double-escaped form must be present, never the bare single quote.
+        expected_escaped = escape_sql_string_literal(adversarial_url)
+        assert f"'{expected_escaped}'" in attach_sqls[0]
+        # Sanity: the un-escaped form would have ended the literal early.
+        assert f"'{adversarial_url}'" not in attach_sqls[0]
+
+    def test_token_with_single_quote_is_escaped(self, captured_conn, monkeypatch):
+        # Defense-in-depth: even if a token somehow contained `'`, the
+        # ATTACH literal still parses safely.
+        monkeypatch.setenv("KBC_TOKEN", "abc'def")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("kbc", "keboola", "https://x", "KBC_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        attach_sqls = [s for s in sql_calls if s.lstrip().upper().startswith("ATTACH ")]
+        assert "'abc''def'" in attach_sqls[0]
+
+
+class TestInstallPathSplit:
+    def test_community_extension_uses_install_from_community(
+        self, captured_conn, monkeypatch
+    ):
+        monkeypatch.setenv("KBC_TOKEN", "tok")
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("kbc", "keboola", "https://x", "KBC_TOKEN")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        install_sqls = [s for s in sql_calls if "INSTALL" in s.upper()]
+        assert any("FROM community" in s for s in install_sqls)
+
+    def test_builtin_extension_uses_load_only(
+        self, captured_conn, monkeypatch
+    ):
+        # Add a fictitious built-in via the override mechanism (we have to
+        # patch the module-level set since AGNES_REMOTE_ATTACH_EXTENSIONS
+        # only affects community).
+        from src import orchestrator_security as oms
+        monkeypatch.setattr(oms, "_BUILTIN_EXTENSIONS", frozenset({"sqlite"}))
+        conn, sql_calls, set_rows = captured_conn
+        set_rows([("sql1", "sqlite", "/tmp/db.sqlite", "")])
+        SyncOrchestrator()._attach_remote_extensions(conn, "src1")
+        install_sqls = [s for s in sql_calls if "INSTALL" in s.upper()]
+        # Built-in: no INSTALL, only LOAD
+        assert install_sqls == []
+        load_sqls = [s for s in sql_calls if s.lstrip().upper().startswith("LOAD ")]
+        assert len(load_sqls) == 1