Merge pull request #372 from keboola/vr/custom-scripts-integration

feat(web): instance.custom_scripts — operator-injected HTML/JS into base.html
This commit is contained in:
Vojtech 2026-05-21 14:27:37 +04:00 committed by GitHub
commit 9f15af85f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 501 additions and 1 deletions

View file

@ -11,6 +11,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
### Added
- `instance.custom_scripts`: operator-injected HTML/JS blocks rendered
into every page that extends `base.html`. Each entry takes `name`,
`enabled`, `placement` (`head_start` | `head_end` | `body_end`), and
`html`. Use for feedback widgets (Marker.io), analytics (GTM,
PostHog), error capture (Sentry). Admin-only; rendered with `| safe`
— same trust boundary as `instance.logo_svg` / `instance.overview`.
Empty default keeps the OSS vendor-neutral. Resolved by
`app/instance_config.py::get_custom_scripts()`; surfaced in
`/admin/server-config` via `_KNOWN_FIELDS["instance"]`. Example
Marker.io block in `config/instance.yaml.example`.
- New `marketplace.curators_url` config item (editable via
`/admin/server-config`**Marketplace** section). Drives the
"See all curators →" link on the `/marketplace` curated-tab info

View file

@ -307,6 +307,24 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = {
"mint-green CTAs and eyebrow accents."
),
},
# Operator-injected HTML/JS blocks rendered into base.html.
# `kind: array` renders as a JSON textarea in the admin UI
# (per admin_server_config.html:702-708 — arrays fall back to
# the JSON path); the hint documents the per-item shape so the
# operator knows what to paste. Resolved by
# `app/instance_config.py::get_custom_scripts()`.
"custom_scripts": {
"kind": "array",
"hint": (
"Operator-injected HTML/JS blocks rendered into base.html. "
"Each entry: {name: str, enabled: bool, placement: "
"head_start|head_end|body_end, html: str}. Used for feedback "
"widgets (Marker.io), analytics (GTM, PostHog), error capture "
"(Sentry). Rendered with | safe — admin trust boundary. Review "
"third-party widget privacy posture before enabling (most "
"capture session data). Restart required after save."
),
},
},
"data_source": {
"bigquery": {

View file

@ -365,6 +365,109 @@ def get_instance_overview() -> str:
return (raw or "").strip()
_CUSTOM_SCRIPT_PLACEMENTS = ("head_start", "head_end", "body_end")
def _custom_script_enabled(value) -> bool:
"""Coerce the per-entry ``enabled`` field tolerant of YAML's many
truthiness shapes.
Operators hand-editing YAML (or pasting blocks from another source)
can land ``enabled: "false"`` (quoted string), ``enabled: 0``, or
``enabled: no`` rather than the boolean ``false``. ``bool("false")``
is ``True`` in Python, so a naive truth check silently keeps the
script live a footgun for what's meant to be a kill switch on
admin-injected JS. Missing / ``None`` live (default-on, matches
the registered field shape).
"""
if value is None:
return True
if isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return value != 0
if isinstance(value, str):
return value.strip().lower() not in ("", "false", "no", "0", "off")
return bool(value)
def get_custom_scripts() -> list[dict]:
"""Operator-injected HTML/JS blocks rendered by ``base.html``.
Reads ``instance.custom_scripts`` from instance.yaml a list of
dicts ``{name, enabled, placement, html}``. Each block lands in one
of three template slots:
- ``head_start`` first thing in ``<head>``, before any CSS/JS
(rare; GTM dataLayer init).
- ``head_end`` last thing in ``<head>`` (default; analytics +
feedback widgets like Marker.io, Sentry, Hotjar).
- ``body_end`` just before ``</body>`` (vendors that explicitly
ask for bottom placement).
Trust boundary: admin-only. ``instance.yaml`` is written through
``/api/admin/server-config`` (gated by ``require_admin``) and the
rendered HTML is interpolated with ``| safe``, exactly mirroring
``instance.logo_svg`` / ``instance.overview``.
Normalization:
- Drop entries whose ``enabled`` resolves to false via
:func:`_custom_script_enabled` (handles quoted strings, 0/1, etc.
not just the Python ``False`` singleton).
- Drop entries whose ``html`` strips to empty.
- Default missing ``name`` to "" and missing ``placement`` to
"head_end".
- Drop entries whose ``placement`` isn't in the allowlist, with a
logged warning naming the offending block admin sees the
mistake instead of the server crashing.
No env-var override: the structure is a list of objects, which
doesn't round-trip cleanly through env vars; deployment-time
injection happens by writing the YAML from the deploy script.
Returns ``[]`` when YAML omits the key empty by default keeps the
OSS vendor-neutral.
"""
raw = get_value("instance", "custom_scripts", default=None)
if not raw:
return []
if not isinstance(raw, list):
logger.warning(
"instance.custom_scripts must be a list, got %s — ignoring",
type(raw).__name__,
)
return []
out: list[dict] = []
for idx, entry in enumerate(raw):
if not isinstance(entry, dict):
logger.warning(
"instance.custom_scripts[%d] must be a dict, got %s — skipping",
idx, type(entry).__name__,
)
continue
if not _custom_script_enabled(entry.get("enabled")):
continue
html = (entry.get("html") or "").strip()
if not html:
continue
placement = (entry.get("placement") or "head_end").strip()
if placement not in _CUSTOM_SCRIPT_PLACEMENTS:
logger.warning(
"instance.custom_scripts[%d] (name=%r) has unknown placement "
"%r — must be one of %s — skipping",
idx, entry.get("name", ""), placement,
", ".join(_CUSTOM_SCRIPT_PLACEMENTS),
)
continue
out.append({
"name": str(entry.get("name") or ""),
"enabled": True,
"placement": placement,
"html": html,
})
return out
def get_workspace_dir_name() -> str:
"""Filesystem-safe folder name for the analyst's local workspace
(``~/<workspace_dir_name>``). Defaults to :func:`get_instance_brand`

View file

@ -26,7 +26,7 @@ from app.instance_config import (
get_instance_admin_email, get_atlassian_base_url,
get_instance_brand, get_workspace_dir_name,
get_instance_logo_svg, get_instance_overview,
get_instance_theme,
get_instance_theme, get_custom_scripts,
)
from app.web.connector_prompts import all_connector_prompts
from app.api.me_debug import (
@ -499,6 +499,12 @@ def _build_context(
# install-block. Operator can hide it via AGNES_HOME_SHOW_AUTOMODE=0
# for cautious rollouts; same content stays on /setup-advanced.
"home_automode": {"show": get_home_automode_visibility()},
# Operator-injected HTML/JS blocks rendered into base.html at
# head_start / head_end / body_end. Admin-only (instance.yaml,
# gated by require_admin) — used for feedback widgets
# (Marker.io), analytics, error capture. Empty default keeps
# the OSS vendor-neutral.
"custom_scripts": get_custom_scripts(),
}
# Flex all extra context values for template compatibility
# (but skip ones we just populated — extras with the same key win)

View file

@ -1,8 +1,19 @@
<!DOCTYPE html>
<html lang="en" data-theme="{{ instance_theme | default('blue') }}">
<head>
{# HTML5 requires <meta charset> within the first 1024 bytes; any
operator-injected snippet must come AFTER charset + viewport to
avoid pushing the declaration past that window (which makes
browsers fall back to a locale-default encoding and historically
opened UTF-7 charset-confusion XSS). #}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
{# Operator-injected scripts (placement=head_start) — run before any
CSS/JS so vendors that need to install global hooks first (GTM
dataLayer init, etc.) work. Admin-only, see instance.custom_scripts. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_start' %}
{{ s.html | safe }}
{% endfor %}
<title>{% block title %}Data Analyst Portal{% endblock %}</title>
<link rel="stylesheet" href="{{ static_url('style-custom.css') }}">
{# Design-system tokens (`--ds-*`) — opt-in green/navy palette used
@ -23,6 +34,12 @@
still get the nav-dropdown wiring. #}
{% block head_extra %}{% endblock %}
{% include '_theme.html' %}
{# Operator-injected scripts (placement=head_end, the default) —
analytics + feedback widgets like Marker.io, Sentry, Hotjar.
Admin-only, see instance.custom_scripts. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_end' %}
{{ s.html | safe }}
{% endfor %}
</head>
<body {% block body_attrs %}{% endblock %}>
{% include '_app_header.html' %}
@ -634,5 +651,11 @@
})();
</script>
{% block scripts %}{% endblock %}
{# Operator-injected scripts (placement=body_end) — for vendors that
explicitly want bottom placement. Admin-only, see
instance.custom_scripts. #}
{% for s in custom_scripts | default([]) if s.placement == 'body_end' %}
{{ s.html | safe }}
{% endfor %}
</body>
</html>

View file

@ -1,8 +1,16 @@
<!DOCTYPE html>
<html lang="en" data-theme="{{ instance_theme | default('blue') }}">
<head>
{# HTML5 requires <meta charset> within the first 1024 bytes; any
operator-injected snippet must come AFTER charset + viewport. See
base.html for the full rationale. #}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
{# Operator-injected scripts (placement=head_start). Mirrors base.html
so login/auth pages surface custom_scripts too. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_start' %}
{{ s.html | safe }}
{% endfor %}
<title>{% block title %}Data Analyst Portal{% endblock %}</title>
<link rel="stylesheet" href="{{ static_url('style-custom.css') }}">
{# Design-system tokens (`--ds-*`) — required so `.btn-primary`
@ -11,6 +19,10 @@
card. Same reason base.html loads it globally. #}
<link rel="stylesheet" href="{{ static_url('css/design-tokens.css') }}">
{% include '_theme.html' %}
{# Operator-injected scripts (placement=head_end). Mirrors base.html. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_end' %}
{{ s.html | safe }}
{% endfor %}
</head>
<body>
{% with messages = get_flashed_messages(with_categories=true) %}
@ -27,5 +39,9 @@
{% block content %}{% endblock %}
{% include "_version_badge.html" %}
{# Operator-injected scripts (placement=body_end). Mirrors base.html. #}
{% for s in custom_scripts | default([]) if s.placement == 'body_end' %}
{{ s.html | safe }}
{% endfor %}
</body>
</html>

View file

@ -57,6 +57,27 @@ instance:
# # Prompts, Tokens, Projects). Visible only to onboarded
# # users regardless of this flag. Default true. Env:
# # AGNES_HOME_SHOW_STATUS_FRAME.
# custom_scripts: # Operator-injected HTML/JS blocks rendered into every
# # page that extends base.html. Use for feedback widgets
# # (Marker.io), analytics (GTM, PostHog), error capture
# # (Sentry), etc. Each entry needs name + enabled +
# # placement + html. Admin-only; rendered with `| safe`.
# # Review the widget's privacy posture before enabling —
# # most third-party widgets capture screenshots, console
# # logs, or user actions on submit. Resolved by
# # `app/instance_config.py::get_custom_scripts()`. No
# # env override (structure doesn't fit env vars cleanly).
# - name: "marker-io" # Example: Marker.io feedback widget.
# enabled: true # Kill switch — set false to disable without deleting.
# placement: "head_end" # head_start | head_end | body_end
# html: |
# <script>
# window.markerConfig = {
# project: 'YOUR_MARKER_IO_PROJECT_ID',
# source: 'snippet'
# };
# !function(e,r,a){if(!e.__Marker){e.__Marker={};var t=[],n={__cs:t};["show","hide","isVisible","capture","cancelCapture","unload","reload","isExtensionInstalled","setReporter","clearReporter","setCustomData","on","off"].forEach(function(e){n[e]=function(){var r=Array.prototype.slice.call(arguments);r.unshift(e),t.push(r)}}),e.Marker=n;var s=r.createElement("script");s.async=1,s.src="https://edge.marker.io/latest/shim.js";var i=r.getElementsByTagName("script")[0];i.parentNode.insertBefore(s,i)}}(window,document);
# </script>
# --- Server ---
server:

View file

@ -0,0 +1,123 @@
"""``instance.custom_scripts`` template-render coverage.
Validates that each placement slot in ``base.html`` actually fires:
``head_start`` lands before the first ``<link>`` in ``<head>``,
``head_end`` lands before ``</head>``, and ``body_end`` lands before
``</body>``. Together with ``test_instance_config.py::TestCustomScripts``
(the normalization layer), this covers the yaml-to-rendered-page path
end-to-end.
Hits ``/login`` since it extends ``base.html`` and needs no auth.
"""
from __future__ import annotations
import tempfile
import pytest
@pytest.fixture
def render_client(monkeypatch):
with tempfile.TemporaryDirectory() as tmp:
monkeypatch.setenv("DATA_DIR", tmp)
monkeypatch.setenv("TESTING", "1")
monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!")
from fastapi.testclient import TestClient
from app.main import app
yield TestClient(app, follow_redirects=False)
def _patch_scripts(monkeypatch, scripts):
"""Replace ``app.web.router.get_custom_scripts`` with a stub returning
``scripts``. router.py binds the import at module load, so patching
here is what _render_ctx actually sees at call time."""
import app.web.router as router_mod
monkeypatch.setattr(router_mod, "get_custom_scripts", lambda: scripts)
def test_no_custom_scripts_renders_no_snippets(render_client, monkeypatch):
_patch_scripts(monkeypatch, [])
resp = render_client.get("/login")
assert resp.status_code == 200
body = resp.text
# Sentinel strings used in the other tests — must be absent here.
assert "AGNES_CUSTOM_SCRIPT_HEAD_START" not in body
assert "AGNES_CUSTOM_SCRIPT_HEAD_END" not in body
assert "AGNES_CUSTOM_SCRIPT_BODY_END" not in body
def test_head_end_snippet_lands_before_head_close(render_client, monkeypatch):
_patch_scripts(monkeypatch, [{
"name": "marker-io",
"enabled": True,
"placement": "head_end",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_HEAD_END=1;</script>",
}])
body = render_client.get("/login").text
sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_END"
assert sentinel in body
snippet_idx = body.index(sentinel)
head_close_idx = body.index("</head>")
assert snippet_idx < head_close_idx, "head_end must render before </head>"
def test_head_start_snippet_lands_after_charset_before_first_link(render_client, monkeypatch):
_patch_scripts(monkeypatch, [{
"name": "gtm-init",
"enabled": True,
"placement": "head_start",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_HEAD_START=1;</script>",
}])
body = render_client.get("/login").text
sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_START"
assert sentinel in body
snippet_idx = body.index(sentinel)
charset_idx = body.index('<meta charset="UTF-8">')
viewport_idx = body.index('<meta name="viewport"')
first_link_idx = body.index("<link")
head_close_idx = body.index("</head>")
# HTML5 spec: <meta charset> must appear within the first 1024 bytes.
# head_start MUST land after both required <meta> tags so a long
# operator snippet can't push the charset declaration past that window
# (which would trigger locale-default encoding fallback + historical
# UTF-7 charset-confusion XSS).
assert charset_idx < snippet_idx, "head_start must render AFTER <meta charset>"
assert viewport_idx < snippet_idx, "head_start must render AFTER <meta viewport>"
# Still before CSS/JS so vendor hooks (e.g. GTM dataLayer init) install
# before any other script can read them.
assert snippet_idx < first_link_idx, "head_start must render before first <link>"
assert snippet_idx < head_close_idx
def test_body_end_snippet_lands_before_body_close(render_client, monkeypatch):
_patch_scripts(monkeypatch, [{
"name": "bottom-tag",
"enabled": True,
"placement": "body_end",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_BODY_END=1;</script>",
}])
body = render_client.get("/login").text
sentinel = "AGNES_CUSTOM_SCRIPT_BODY_END"
assert sentinel in body
snippet_idx = body.index(sentinel)
body_close_idx = body.index("</body>")
head_close_idx = body.index("</head>")
assert snippet_idx > head_close_idx, "body_end must render after </head>"
assert snippet_idx < body_close_idx
def test_all_three_placements_render_in_correct_order(render_client, monkeypatch):
_patch_scripts(monkeypatch, [
{"name": "a", "enabled": True, "placement": "head_start",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_HEAD_START=1;</script>"},
{"name": "b", "enabled": True, "placement": "head_end",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_HEAD_END=1;</script>"},
{"name": "c", "enabled": True, "placement": "body_end",
"html": "<script>window.AGNES_CUSTOM_SCRIPT_BODY_END=1;</script>"},
])
body = render_client.get("/login").text
head_start_idx = body.index("AGNES_CUSTOM_SCRIPT_HEAD_START")
head_end_idx = body.index("AGNES_CUSTOM_SCRIPT_HEAD_END")
body_end_idx = body.index("AGNES_CUSTOM_SCRIPT_BODY_END")
assert head_start_idx < head_end_idx < body_end_idx

View file

@ -160,3 +160,183 @@ class TestInstanceBrand:
assert "Bootstrap your Agnes workspace" in joined
assert "Agnes workspace is ready" in joined
mod._instance_config = None
class TestCustomScripts:
"""instance.custom_scripts — operator-injected HTML/JS blocks rendered
by base.html. Validates the normalization + filtering done by
get_custom_scripts() so the template can iterate over a clean list."""
def _reload(self, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
monkeypatch.setenv("TESTING", "1")
monkeypatch.setenv("JWT_SECRET_KEY", "test-secret-key-minimum-32-characters!!")
import importlib
import app.instance_config as mod
mod._instance_config = None
importlib.reload(mod)
return mod
def _write(self, tmp_path, yaml_body: str):
state_dir = tmp_path / "state"
state_dir.mkdir(exist_ok=True)
(state_dir / "instance.yaml").write_text(yaml_body)
def test_yaml_absent_returns_empty_list(self, tmp_path, monkeypatch):
mod = self._reload(tmp_path, monkeypatch)
assert mod.get_custom_scripts() == []
mod._instance_config = None
def test_valid_entry_normalized(self, tmp_path, monkeypatch):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - name: marker-io\n"
" enabled: true\n"
" placement: head_end\n"
" html: |\n"
" <script>window.markerConfig={project:'abc'};</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
scripts = mod.get_custom_scripts()
assert len(scripts) == 1
s = scripts[0]
assert s["name"] == "marker-io"
assert s["enabled"] is True
assert s["placement"] == "head_end"
assert "markerConfig" in s["html"]
mod._instance_config = None
def test_disabled_entry_dropped(self, tmp_path, monkeypatch):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - name: off\n"
" enabled: false\n"
" placement: head_end\n"
" html: <script>1</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
assert mod.get_custom_scripts() == []
mod._instance_config = None
def test_empty_html_dropped(self, tmp_path, monkeypatch):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - name: noop\n"
" enabled: true\n"
" placement: head_end\n"
" html: ' '\n"
))
mod = self._reload(tmp_path, monkeypatch)
assert mod.get_custom_scripts() == []
mod._instance_config = None
def test_bad_placement_dropped_with_warning(self, tmp_path, monkeypatch, caplog):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - name: typo\n"
" enabled: true\n"
" placement: body_start\n"
" html: <script>1</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
import logging
with caplog.at_level(logging.WARNING, logger="app.instance_config"):
assert mod.get_custom_scripts() == []
assert any("unknown placement" in r.message for r in caplog.records)
mod._instance_config = None
def test_missing_placement_defaults_to_head_end(self, tmp_path, monkeypatch):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - name: defaulting\n"
" enabled: true\n"
" html: <script>x</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
scripts = mod.get_custom_scripts()
assert len(scripts) == 1
assert scripts[0]["placement"] == "head_end"
mod._instance_config = None
def test_three_placements_all_pass_through(self, tmp_path, monkeypatch):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
" - {name: a, enabled: true, placement: head_start, html: '<script>1</script>'}\n"
" - {name: b, enabled: true, placement: head_end, html: '<script>2</script>'}\n"
" - {name: c, enabled: true, placement: body_end, html: '<script>3</script>'}\n"
))
mod = self._reload(tmp_path, monkeypatch)
scripts = mod.get_custom_scripts()
assert [s["placement"] for s in scripts] == ["head_start", "head_end", "body_end"]
assert [s["name"] for s in scripts] == ["a", "b", "c"]
mod._instance_config = None
def test_non_list_value_ignored_with_warning(self, tmp_path, monkeypatch, caplog):
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts: not-a-list\n"
))
mod = self._reload(tmp_path, monkeypatch)
import logging
with caplog.at_level(logging.WARNING, logger="app.instance_config"):
assert mod.get_custom_scripts() == []
assert any("must be a list" in r.message for r in caplog.records)
mod._instance_config = None
@pytest.mark.parametrize("enabled_yaml,expect_dropped", [
# Boolean false in every YAML truthy-shape the operator might use.
# All of these must drop the entry so the kill switch behaves the
# same regardless of whether the operator pasted a quoted block.
("false", True),
("False", True),
('"false"', True), # quoted string — bool("false") == True in Python
('"no"', True),
('"NO"', True),
('"off"', True),
('"0"', True),
("0", True),
# Boolean true / typical live values must keep the entry alive.
("true", False),
("True", False),
('"true"', False),
('"yes"', False),
("1", False),
])
def test_enabled_coercion(self, tmp_path, monkeypatch, enabled_yaml, expect_dropped):
"""Quoted-string + numeric `enabled` values must be coerced the same
way the operator expects from a Boolean field the kill switch is
the whole point of the field, and `bool("false") == True` would
silently leave the snippet live (review PR #372)."""
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
f" - name: probe\n"
f" enabled: {enabled_yaml}\n"
" placement: head_end\n"
" html: <script>1</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
scripts = mod.get_custom_scripts()
if expect_dropped:
assert scripts == [], (
f"enabled={enabled_yaml!r} should drop the entry but it survived"
)
else:
assert len(scripts) == 1, (
f"enabled={enabled_yaml!r} should keep the entry but it was dropped"
)
mod._instance_config = None