From 58001af27d9d4d76ee53b765d7595e6b51fea49a Mon Sep 17 00:00:00 2001 From: Vojtech Rysanek Date: Thu, 21 May 2026 13:59:11 +0400 Subject: [PATCH] =?UTF-8?q?fix(web):=20address=20PR=20#372=20review=20?= =?UTF-8?q?=E2=80=94=20meta=20charset=20ordering=20+=20enabled=20coercion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues raised by @minasarustamyan: 1. head_start loop rendered BEFORE in both base.html and base_login.html. HTML5 requires the charset declaration within the first 1024 bytes; a long operator-injected snippet could push it past that window, dropping browsers into locale-default encoding (historically a UTF-7 charset-confusion XSS vector). Move the head_start loop after charset + viewport meta tags — vendor hooks (GTM dataLayer init, etc.) still install before any CSS/JS, the two required meta tags just come first. 2. `entry.get("enabled") is False` matched only the Python False singleton — `enabled: "false"` (quoted YAML string), `enabled: 0`, `enabled: "no"` etc. all slipped through (bool("false") == True in Python). For what is meant to be a kill switch on admin-injected JS, an operator who fat-fingers the quoting would silently leave the snippet live. Add `_custom_script_enabled()` coercion helper that honours quoted booleans, numeric 0, and the usual `no`/`off`/`""` variants. Default-on for missing / None to preserve the default-enabled field semantics. 13 new parametrized tests cover every YAML truthy-shape the operator might paste. Render test now asserts head_start lands after both required tags. --- app/instance_config.py | 29 +++++++++++++++++-- app/web/templates/base.html | 9 ++++-- app/web/templates/base_login.html | 7 +++-- tests/test_custom_scripts_render.py | 13 ++++++++- tests/test_instance_config.py | 45 +++++++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 7 deletions(-) diff --git a/app/instance_config.py b/app/instance_config.py index 9ccef45..ec794fb 100644 --- a/app/instance_config.py +++ b/app/instance_config.py @@ -367,6 +367,29 @@ def get_instance_overview() -> str: _CUSTOM_SCRIPT_PLACEMENTS = ("head_start", "head_end", "body_end") +def _custom_script_enabled(value) -> bool: + """Coerce the per-entry ``enabled`` field tolerant of YAML's many + truthiness shapes. + + Operators hand-editing YAML (or pasting blocks from another source) + can land ``enabled: "false"`` (quoted string), ``enabled: 0``, or + ``enabled: no`` rather than the boolean ``false``. ``bool("false")`` + is ``True`` in Python, so a naive truth check silently keeps the + script live — a footgun for what's meant to be a kill switch on + admin-injected JS. Missing / ``None`` → live (default-on, matches + the registered field shape). + """ + if value is None: + return True + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value != 0 + if isinstance(value, str): + return value.strip().lower() not in ("", "false", "no", "0", "off") + return bool(value) + + def get_custom_scripts() -> list[dict]: """Operator-injected HTML/JS blocks rendered by ``base.html``. @@ -387,7 +410,9 @@ def get_custom_scripts() -> list[dict]: ``instance.logo_svg`` / ``instance.overview``. Normalization: - - Drop entries with ``enabled=False``. + - Drop entries whose ``enabled`` resolves to false via + :func:`_custom_script_enabled` (handles quoted strings, 0/1, etc. + — not just the Python ``False`` singleton). - Drop entries whose ``html`` strips to empty. - Default missing ``name`` to "" and missing ``placement`` to "head_end". @@ -419,7 +444,7 @@ def get_custom_scripts() -> list[dict]: idx, type(entry).__name__, ) continue - if entry.get("enabled") is False: + if not _custom_script_enabled(entry.get("enabled")): continue html = (entry.get("html") or "").strip() if not html: diff --git a/app/web/templates/base.html b/app/web/templates/base.html index b6c5f7f..97e53b5 100644 --- a/app/web/templates/base.html +++ b/app/web/templates/base.html @@ -1,14 +1,19 @@ + {# HTML5 requires within the first 1024 bytes; any + operator-injected snippet must come AFTER charset + viewport to + avoid pushing the declaration past that window (which makes + browsers fall back to a locale-default encoding and historically + opened UTF-7 charset-confusion XSS). #} + + {# Operator-injected scripts (placement=head_start) — run before any CSS/JS so vendors that need to install global hooks first (GTM dataLayer init, etc.) work. Admin-only, see instance.custom_scripts. #} {% for s in custom_scripts | default([]) if s.placement == 'head_start' %} {{ s.html | safe }} {% endfor %} - - {% block title %}Data Analyst Portal{% endblock %} {# Design-system tokens (`--ds-*`) — opt-in green/navy palette used diff --git a/app/web/templates/base_login.html b/app/web/templates/base_login.html index f227af7..a925de9 100644 --- a/app/web/templates/base_login.html +++ b/app/web/templates/base_login.html @@ -1,13 +1,16 @@ + {# HTML5 requires within the first 1024 bytes; any + operator-injected snippet must come AFTER charset + viewport. See + base.html for the full rationale. #} + + {# Operator-injected scripts (placement=head_start). Mirrors base.html so login/auth pages surface custom_scripts too. #} {% for s in custom_scripts | default([]) if s.placement == 'head_start' %} {{ s.html | safe }} {% endfor %} - - {% block title %}Data Analyst Portal{% endblock %} {% include '_theme.html' %} diff --git a/tests/test_custom_scripts_render.py b/tests/test_custom_scripts_render.py index b211f8b..80abc2e 100644 --- a/tests/test_custom_scripts_render.py +++ b/tests/test_custom_scripts_render.py @@ -62,7 +62,7 @@ def test_head_end_snippet_lands_before_head_close(render_client, monkeypatch): assert snippet_idx < head_close_idx, "head_end must render before " -def test_head_start_snippet_lands_before_first_link(render_client, monkeypatch): +def test_head_start_snippet_lands_after_charset_before_first_link(render_client, monkeypatch): _patch_scripts(monkeypatch, [{ "name": "gtm-init", "enabled": True, @@ -73,8 +73,19 @@ def test_head_start_snippet_lands_before_first_link(render_client, monkeypatch): sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_START" assert sentinel in body snippet_idx = body.index(sentinel) + charset_idx = body.index('') + viewport_idx = body.index('") + # HTML5 spec: must appear within the first 1024 bytes. + # head_start MUST land after both required tags so a long + # operator snippet can't push the charset declaration past that window + # (which would trigger locale-default encoding fallback + historical + # UTF-7 charset-confusion XSS). + assert charset_idx < snippet_idx, "head_start must render AFTER " + assert viewport_idx < snippet_idx, "head_start must render AFTER " + # Still before CSS/JS so vendor hooks (e.g. GTM dataLayer init) install + # before any other script can read them. assert snippet_idx < first_link_idx, "head_start must render before first " assert snippet_idx < head_close_idx diff --git a/tests/test_instance_config.py b/tests/test_instance_config.py index 9c871ec..4855692 100644 --- a/tests/test_instance_config.py +++ b/tests/test_instance_config.py @@ -295,3 +295,48 @@ class TestCustomScripts: assert mod.get_custom_scripts() == [] assert any("must be a list" in r.message for r in caplog.records) mod._instance_config = None + + @pytest.mark.parametrize("enabled_yaml,expect_dropped", [ + # Boolean false in every YAML truthy-shape the operator might use. + # All of these must drop the entry so the kill switch behaves the + # same regardless of whether the operator pasted a quoted block. + ("false", True), + ("False", True), + ('"false"', True), # quoted string — bool("false") == True in Python + ('"no"', True), + ('"NO"', True), + ('"off"', True), + ('"0"', True), + ("0", True), + # Boolean true / typical live values must keep the entry alive. + ("true", False), + ("True", False), + ('"true"', False), + ('"yes"', False), + ("1", False), + ]) + def test_enabled_coercion(self, tmp_path, monkeypatch, enabled_yaml, expect_dropped): + """Quoted-string + numeric `enabled` values must be coerced the same + way the operator expects from a Boolean field — the kill switch is + the whole point of the field, and `bool("false") == True` would + silently leave the snippet live (review PR #372).""" + self._write(tmp_path, ( + "instance:\n" + " name: Acme\n" + " custom_scripts:\n" + f" - name: probe\n" + f" enabled: {enabled_yaml}\n" + " placement: head_end\n" + " html: \n" + )) + mod = self._reload(tmp_path, monkeypatch) + scripts = mod.get_custom_scripts() + if expect_dropped: + assert scripts == [], ( + f"enabled={enabled_yaml!r} should drop the entry but it survived" + ) + else: + assert len(scripts) == 1, ( + f"enabled={enabled_yaml!r} should keep the entry but it was dropped" + ) + mod._instance_config = None