diff --git a/CHANGELOG.md b/CHANGELOG.md index 1556390..d6b3276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] ### Added +- `instance.custom_scripts`: operator-injected HTML/JS blocks rendered + into every page that extends `base.html`. Each entry takes `name`, + `enabled`, `placement` (`head_start` | `head_end` | `body_end`), and + `html`. Use for feedback widgets (Marker.io), analytics (GTM, + PostHog), error capture (Sentry). Admin-only; rendered with `| safe` + — same trust boundary as `instance.logo_svg` / `instance.overview`. + Empty default keeps the OSS vendor-neutral. Resolved by + `app/instance_config.py::get_custom_scripts()`; surfaced in + `/admin/server-config` via `_KNOWN_FIELDS["instance"]`. Example + Marker.io block in `config/instance.yaml.example`. - New `marketplace.curators_url` config item (editable via `/admin/server-config` → **Marketplace** section). Drives the "See all curators →" link on the `/marketplace` curated-tab info diff --git a/app/api/admin.py b/app/api/admin.py index d767d72..962b39a 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -307,6 +307,24 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = { "mint-green CTAs and eyebrow accents." ), }, + # Operator-injected HTML/JS blocks rendered into base.html. + # `kind: array` renders as a JSON textarea in the admin UI + # (per admin_server_config.html:702-708 — arrays fall back to + # the JSON path); the hint documents the per-item shape so the + # operator knows what to paste. Resolved by + # `app/instance_config.py::get_custom_scripts()`. + "custom_scripts": { + "kind": "array", + "hint": ( + "Operator-injected HTML/JS blocks rendered into base.html. " + "Each entry: {name: str, enabled: bool, placement: " + "head_start|head_end|body_end, html: str}. Used for feedback " + "widgets (Marker.io), analytics (GTM, PostHog), error capture " + "(Sentry). Rendered with | safe — admin trust boundary. Review " + "third-party widget privacy posture before enabling (most " + "capture session data). Restart required after save." + ), + }, }, "data_source": { "bigquery": { diff --git a/app/instance_config.py b/app/instance_config.py index 44de732..bb0af83 100644 --- a/app/instance_config.py +++ b/app/instance_config.py @@ -365,6 +365,109 @@ def get_instance_overview() -> str: return (raw or "").strip() +_CUSTOM_SCRIPT_PLACEMENTS = ("head_start", "head_end", "body_end") + + +def _custom_script_enabled(value) -> bool: + """Coerce the per-entry ``enabled`` field tolerant of YAML's many + truthiness shapes. + + Operators hand-editing YAML (or pasting blocks from another source) + can land ``enabled: "false"`` (quoted string), ``enabled: 0``, or + ``enabled: no`` rather than the boolean ``false``. ``bool("false")`` + is ``True`` in Python, so a naive truth check silently keeps the + script live — a footgun for what's meant to be a kill switch on + admin-injected JS. Missing / ``None`` → live (default-on, matches + the registered field shape). + """ + if value is None: + return True + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value != 0 + if isinstance(value, str): + return value.strip().lower() not in ("", "false", "no", "0", "off") + return bool(value) + + +def get_custom_scripts() -> list[dict]: + """Operator-injected HTML/JS blocks rendered by ``base.html``. + + Reads ``instance.custom_scripts`` from instance.yaml — a list of + dicts ``{name, enabled, placement, html}``. Each block lands in one + of three template slots: + + - ``head_start`` — first thing in ````, before any CSS/JS + (rare; GTM dataLayer init). + - ``head_end`` — last thing in ```` (default; analytics + + feedback widgets like Marker.io, Sentry, Hotjar). + - ``body_end`` — just before ```` (vendors that explicitly + ask for bottom placement). + + Trust boundary: admin-only. ``instance.yaml`` is written through + ``/api/admin/server-config`` (gated by ``require_admin``) and the + rendered HTML is interpolated with ``| safe``, exactly mirroring + ``instance.logo_svg`` / ``instance.overview``. + + Normalization: + - Drop entries whose ``enabled`` resolves to false via + :func:`_custom_script_enabled` (handles quoted strings, 0/1, etc. + — not just the Python ``False`` singleton). + - Drop entries whose ``html`` strips to empty. + - Default missing ``name`` to "" and missing ``placement`` to + "head_end". + - Drop entries whose ``placement`` isn't in the allowlist, with a + logged warning naming the offending block — admin sees the + mistake instead of the server crashing. + + No env-var override: the structure is a list of objects, which + doesn't round-trip cleanly through env vars; deployment-time + injection happens by writing the YAML from the deploy script. + + Returns ``[]`` when YAML omits the key — empty by default keeps the + OSS vendor-neutral. + """ + raw = get_value("instance", "custom_scripts", default=None) + if not raw: + return [] + if not isinstance(raw, list): + logger.warning( + "instance.custom_scripts must be a list, got %s — ignoring", + type(raw).__name__, + ) + return [] + out: list[dict] = [] + for idx, entry in enumerate(raw): + if not isinstance(entry, dict): + logger.warning( + "instance.custom_scripts[%d] must be a dict, got %s — skipping", + idx, type(entry).__name__, + ) + continue + if not _custom_script_enabled(entry.get("enabled")): + continue + html = (entry.get("html") or "").strip() + if not html: + continue + placement = (entry.get("placement") or "head_end").strip() + if placement not in _CUSTOM_SCRIPT_PLACEMENTS: + logger.warning( + "instance.custom_scripts[%d] (name=%r) has unknown placement " + "%r — must be one of %s — skipping", + idx, entry.get("name", ""), placement, + ", ".join(_CUSTOM_SCRIPT_PLACEMENTS), + ) + continue + out.append({ + "name": str(entry.get("name") or ""), + "enabled": True, + "placement": placement, + "html": html, + }) + return out + + def get_workspace_dir_name() -> str: """Filesystem-safe folder name for the analyst's local workspace (``~/``). Defaults to :func:`get_instance_brand` diff --git a/app/web/router.py b/app/web/router.py index 38a8b39..b119dbc 100644 --- a/app/web/router.py +++ b/app/web/router.py @@ -26,7 +26,7 @@ from app.instance_config import ( get_instance_admin_email, get_atlassian_base_url, get_instance_brand, get_workspace_dir_name, get_instance_logo_svg, get_instance_overview, - get_instance_theme, + get_instance_theme, get_custom_scripts, ) from app.web.connector_prompts import all_connector_prompts from app.api.me_debug import ( @@ -499,6 +499,12 @@ def _build_context( # install-block. Operator can hide it via AGNES_HOME_SHOW_AUTOMODE=0 # for cautious rollouts; same content stays on /setup-advanced. "home_automode": {"show": get_home_automode_visibility()}, + # Operator-injected HTML/JS blocks rendered into base.html at + # head_start / head_end / body_end. Admin-only (instance.yaml, + # gated by require_admin) — used for feedback widgets + # (Marker.io), analytics, error capture. Empty default keeps + # the OSS vendor-neutral. + "custom_scripts": get_custom_scripts(), } # Flex all extra context values for template compatibility # (but skip ones we just populated — extras with the same key win) diff --git a/app/web/templates/base.html b/app/web/templates/base.html index f86bedf..ce22334 100644 --- a/app/web/templates/base.html +++ b/app/web/templates/base.html @@ -1,8 +1,19 @@ + {# HTML5 requires within the first 1024 bytes; any + operator-injected snippet must come AFTER charset + viewport to + avoid pushing the declaration past that window (which makes + browsers fall back to a locale-default encoding and historically + opened UTF-7 charset-confusion XSS). #} + {# Operator-injected scripts (placement=head_start) — run before any + CSS/JS so vendors that need to install global hooks first (GTM + dataLayer init, etc.) work. Admin-only, see instance.custom_scripts. #} + {% for s in custom_scripts | default([]) if s.placement == 'head_start' %} + {{ s.html | safe }} + {% endfor %} {% block title %}Data Analyst Portal{% endblock %} {# Design-system tokens (`--ds-*`) — opt-in green/navy palette used @@ -23,6 +34,12 @@ still get the nav-dropdown wiring. #} {% block head_extra %}{% endblock %} {% include '_theme.html' %} + {# Operator-injected scripts (placement=head_end, the default) — + analytics + feedback widgets like Marker.io, Sentry, Hotjar. + Admin-only, see instance.custom_scripts. #} + {% for s in custom_scripts | default([]) if s.placement == 'head_end' %} + {{ s.html | safe }} + {% endfor %} {% include '_app_header.html' %} @@ -634,5 +651,11 @@ })(); {% block scripts %}{% endblock %} + {# Operator-injected scripts (placement=body_end) — for vendors that + explicitly want bottom placement. Admin-only, see + instance.custom_scripts. #} + {% for s in custom_scripts | default([]) if s.placement == 'body_end' %} + {{ s.html | safe }} + {% endfor %} diff --git a/app/web/templates/base_login.html b/app/web/templates/base_login.html index 5e51e6c..619eaab 100644 --- a/app/web/templates/base_login.html +++ b/app/web/templates/base_login.html @@ -1,8 +1,16 @@ + {# HTML5 requires within the first 1024 bytes; any + operator-injected snippet must come AFTER charset + viewport. See + base.html for the full rationale. #} + {# Operator-injected scripts (placement=head_start). Mirrors base.html + so login/auth pages surface custom_scripts too. #} + {% for s in custom_scripts | default([]) if s.placement == 'head_start' %} + {{ s.html | safe }} + {% endfor %} {% block title %}Data Analyst Portal{% endblock %} {# Design-system tokens (`--ds-*`) — required so `.btn-primary` @@ -11,6 +19,10 @@ card. Same reason base.html loads it globally. #} {% include '_theme.html' %} + {# Operator-injected scripts (placement=head_end). Mirrors base.html. #} + {% for s in custom_scripts | default([]) if s.placement == 'head_end' %} + {{ s.html | safe }} + {% endfor %} {% with messages = get_flashed_messages(with_categories=true) %} @@ -27,5 +39,9 @@ {% block content %}{% endblock %} {% include "_version_badge.html" %} + {# Operator-injected scripts (placement=body_end). Mirrors base.html. #} + {% for s in custom_scripts | default([]) if s.placement == 'body_end' %} + {{ s.html | safe }} + {% endfor %} diff --git a/config/instance.yaml.example b/config/instance.yaml.example index bf5a35b..25d20de 100644 --- a/config/instance.yaml.example +++ b/config/instance.yaml.example @@ -57,6 +57,27 @@ instance: # # Prompts, Tokens, Projects). Visible only to onboarded # # users regardless of this flag. Default true. Env: # # AGNES_HOME_SHOW_STATUS_FRAME. + # custom_scripts: # Operator-injected HTML/JS blocks rendered into every + # # page that extends base.html. Use for feedback widgets + # # (Marker.io), analytics (GTM, PostHog), error capture + # # (Sentry), etc. Each entry needs name + enabled + + # # placement + html. Admin-only; rendered with `| safe`. + # # Review the widget's privacy posture before enabling — + # # most third-party widgets capture screenshots, console + # # logs, or user actions on submit. Resolved by + # # `app/instance_config.py::get_custom_scripts()`. No + # # env override (structure doesn't fit env vars cleanly). + # - name: "marker-io" # Example: Marker.io feedback widget. + # enabled: true # Kill switch — set false to disable without deleting. + # placement: "head_end" # head_start | head_end | body_end + # html: | + # # --- Server --- server: diff --git a/tests/test_custom_scripts_render.py b/tests/test_custom_scripts_render.py new file mode 100644 index 0000000..80abc2e --- /dev/null +++ b/tests/test_custom_scripts_render.py @@ -0,0 +1,123 @@ +"""``instance.custom_scripts`` template-render coverage. + +Validates that each placement slot in ``base.html`` actually fires: +``head_start`` lands before the first ```` in ````, +``head_end`` lands before ````, and ``body_end`` lands before +````. Together with ``test_instance_config.py::TestCustomScripts`` +(the normalization layer), this covers the yaml-to-rendered-page path +end-to-end. + +Hits ``/login`` since it extends ``base.html`` and needs no auth. +""" + +from __future__ import annotations + +import tempfile + +import pytest + + +@pytest.fixture +def render_client(monkeypatch): + with tempfile.TemporaryDirectory() as tmp: + monkeypatch.setenv("DATA_DIR", tmp) + monkeypatch.setenv("TESTING", "1") + monkeypatch.setenv("JWT_SECRET_KEY", "test-jwt-secret-key-minimum-32-chars!!") + from fastapi.testclient import TestClient + from app.main import app + yield TestClient(app, follow_redirects=False) + + +def _patch_scripts(monkeypatch, scripts): + """Replace ``app.web.router.get_custom_scripts`` with a stub returning + ``scripts``. router.py binds the import at module load, so patching + here is what _render_ctx actually sees at call time.""" + import app.web.router as router_mod + monkeypatch.setattr(router_mod, "get_custom_scripts", lambda: scripts) + + +def test_no_custom_scripts_renders_no_snippets(render_client, monkeypatch): + _patch_scripts(monkeypatch, []) + resp = render_client.get("/login") + assert resp.status_code == 200 + body = resp.text + # Sentinel strings used in the other tests — must be absent here. + assert "AGNES_CUSTOM_SCRIPT_HEAD_START" not in body + assert "AGNES_CUSTOM_SCRIPT_HEAD_END" not in body + assert "AGNES_CUSTOM_SCRIPT_BODY_END" not in body + + +def test_head_end_snippet_lands_before_head_close(render_client, monkeypatch): + _patch_scripts(monkeypatch, [{ + "name": "marker-io", + "enabled": True, + "placement": "head_end", + "html": "", + }]) + body = render_client.get("/login").text + sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_END" + assert sentinel in body + snippet_idx = body.index(sentinel) + head_close_idx = body.index("") + assert snippet_idx < head_close_idx, "head_end must render before " + + +def test_head_start_snippet_lands_after_charset_before_first_link(render_client, monkeypatch): + _patch_scripts(monkeypatch, [{ + "name": "gtm-init", + "enabled": True, + "placement": "head_start", + "html": "", + }]) + body = render_client.get("/login").text + sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_START" + assert sentinel in body + snippet_idx = body.index(sentinel) + charset_idx = body.index('') + viewport_idx = body.index('") + # HTML5 spec: must appear within the first 1024 bytes. + # head_start MUST land after both required tags so a long + # operator snippet can't push the charset declaration past that window + # (which would trigger locale-default encoding fallback + historical + # UTF-7 charset-confusion XSS). + assert charset_idx < snippet_idx, "head_start must render AFTER " + assert viewport_idx < snippet_idx, "head_start must render AFTER " + # Still before CSS/JS so vendor hooks (e.g. GTM dataLayer init) install + # before any other script can read them. + assert snippet_idx < first_link_idx, "head_start must render before first " + assert snippet_idx < head_close_idx + + +def test_body_end_snippet_lands_before_body_close(render_client, monkeypatch): + _patch_scripts(monkeypatch, [{ + "name": "bottom-tag", + "enabled": True, + "placement": "body_end", + "html": "", + }]) + body = render_client.get("/login").text + sentinel = "AGNES_CUSTOM_SCRIPT_BODY_END" + assert sentinel in body + snippet_idx = body.index(sentinel) + body_close_idx = body.index("") + head_close_idx = body.index("") + assert snippet_idx > head_close_idx, "body_end must render after " + assert snippet_idx < body_close_idx + + +def test_all_three_placements_render_in_correct_order(render_client, monkeypatch): + _patch_scripts(monkeypatch, [ + {"name": "a", "enabled": True, "placement": "head_start", + "html": ""}, + {"name": "b", "enabled": True, "placement": "head_end", + "html": ""}, + {"name": "c", "enabled": True, "placement": "body_end", + "html": ""}, + ]) + body = render_client.get("/login").text + head_start_idx = body.index("AGNES_CUSTOM_SCRIPT_HEAD_START") + head_end_idx = body.index("AGNES_CUSTOM_SCRIPT_HEAD_END") + body_end_idx = body.index("AGNES_CUSTOM_SCRIPT_BODY_END") + assert head_start_idx < head_end_idx < body_end_idx diff --git a/tests/test_instance_config.py b/tests/test_instance_config.py index 513a3ce..4855692 100644 --- a/tests/test_instance_config.py +++ b/tests/test_instance_config.py @@ -160,3 +160,183 @@ class TestInstanceBrand: assert "Bootstrap your Agnes workspace" in joined assert "Agnes workspace is ready" in joined mod._instance_config = None + + +class TestCustomScripts: + """instance.custom_scripts — operator-injected HTML/JS blocks rendered + by base.html. Validates the normalization + filtering done by + get_custom_scripts() so the template can iterate over a clean list.""" + + def _reload(self, tmp_path, monkeypatch): + monkeypatch.setenv("DATA_DIR", str(tmp_path)) + monkeypatch.setenv("TESTING", "1") + monkeypatch.setenv("JWT_SECRET_KEY", "test-secret-key-minimum-32-characters!!") + import importlib + import app.instance_config as mod + mod._instance_config = None + importlib.reload(mod) + return mod + + def _write(self, tmp_path, yaml_body: str): + state_dir = tmp_path / "state" + state_dir.mkdir(exist_ok=True) + (state_dir / "instance.yaml").write_text(yaml_body) + + def test_yaml_absent_returns_empty_list(self, tmp_path, monkeypatch): + mod = self._reload(tmp_path, monkeypatch) + assert mod.get_custom_scripts() == [] + mod._instance_config = None + + def test_valid_entry_normalized(self, tmp_path, monkeypatch): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - name: marker-io\n" + " enabled: true\n" + " placement: head_end\n" + " html: |\n" + " \n" + )) + mod = self._reload(tmp_path, monkeypatch) + scripts = mod.get_custom_scripts() + assert len(scripts) == 1 + s = scripts[0] + assert s["name"] == "marker-io" + assert s["enabled"] is True + assert s["placement"] == "head_end" + assert "markerConfig" in s["html"] + mod._instance_config = None + + def test_disabled_entry_dropped(self, tmp_path, monkeypatch): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - name: off\n" + " enabled: false\n" + " placement: head_end\n" + " html: \n" + )) + mod = self._reload(tmp_path, monkeypatch) + assert mod.get_custom_scripts() == [] + mod._instance_config = None + + def test_empty_html_dropped(self, tmp_path, monkeypatch): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - name: noop\n" + " enabled: true\n" + " placement: head_end\n" + " html: ' '\n" + )) + mod = self._reload(tmp_path, monkeypatch) + assert mod.get_custom_scripts() == [] + mod._instance_config = None + + def test_bad_placement_dropped_with_warning(self, tmp_path, monkeypatch, caplog): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - name: typo\n" + " enabled: true\n" + " placement: body_start\n" + " html: \n" + )) + mod = self._reload(tmp_path, monkeypatch) + import logging + with caplog.at_level(logging.WARNING, logger="app.instance_config"): + assert mod.get_custom_scripts() == [] + assert any("unknown placement" in r.message for r in caplog.records) + mod._instance_config = None + + def test_missing_placement_defaults_to_head_end(self, tmp_path, monkeypatch): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - name: defaulting\n" + " enabled: true\n" + " html: \n" + )) + mod = self._reload(tmp_path, monkeypatch) + scripts = mod.get_custom_scripts() + assert len(scripts) == 1 + assert scripts[0]["placement"] == "head_end" + mod._instance_config = None + + def test_three_placements_all_pass_through(self, tmp_path, monkeypatch): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + " - {name: a, enabled: true, placement: head_start, html: ''}\n" + " - {name: b, enabled: true, placement: head_end, html: ''}\n" + " - {name: c, enabled: true, placement: body_end, html: ''}\n" + )) + mod = self._reload(tmp_path, monkeypatch) + scripts = mod.get_custom_scripts() + assert [s["placement"] for s in scripts] == ["head_start", "head_end", "body_end"] + assert [s["name"] for s in scripts] == ["a", "b", "c"] + mod._instance_config = None + + def test_non_list_value_ignored_with_warning(self, tmp_path, monkeypatch, caplog): + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts: not-a-list\n" + )) + mod = self._reload(tmp_path, monkeypatch) + import logging + with caplog.at_level(logging.WARNING, logger="app.instance_config"): + assert mod.get_custom_scripts() == [] + assert any("must be a list" in r.message for r in caplog.records) + mod._instance_config = None + + @pytest.mark.parametrize("enabled_yaml,expect_dropped", [ + # Boolean false in every YAML truthy-shape the operator might use. + # All of these must drop the entry so the kill switch behaves the + # same regardless of whether the operator pasted a quoted block. + ("false", True), + ("False", True), + ('"false"', True), # quoted string — bool("false") == True in Python + ('"no"', True), + ('"NO"', True), + ('"off"', True), + ('"0"', True), + ("0", True), + # Boolean true / typical live values must keep the entry alive. + ("true", False), + ("True", False), + ('"true"', False), + ('"yes"', False), + ("1", False), + ]) + def test_enabled_coercion(self, tmp_path, monkeypatch, enabled_yaml, expect_dropped): + """Quoted-string + numeric `enabled` values must be coerced the same + way the operator expects from a Boolean field — the kill switch is + the whole point of the field, and `bool("false") == True` would + silently leave the snippet live (review PR #372).""" + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + f" - name: probe\n" + f" enabled: {enabled_yaml}\n" + " placement: head_end\n" + " html: \n" + )) + mod = self._reload(tmp_path, monkeypatch) + scripts = mod.get_custom_scripts() + if expect_dropped: + assert scripts == [], ( + f"enabled={enabled_yaml!r} should drop the entry but it survived" + ) + else: + assert len(scripts) == 1, ( + f"enabled={enabled_yaml!r} should keep the entry but it was dropped" + ) + mod._instance_config = None