fix(web): address PR #372 review — meta charset ordering + enabled coercion

Two issues raised by @minasarustamyan:

1. head_start loop rendered BEFORE <meta charset="UTF-8"> in both
   base.html and base_login.html. HTML5 requires the charset declaration
   within the first 1024 bytes; a long operator-injected snippet could
   push it past that window, dropping browsers into locale-default
   encoding (historically a UTF-7 charset-confusion XSS vector). Move
   the head_start loop after charset + viewport meta tags — vendor
   hooks (GTM dataLayer init, etc.) still install before any CSS/JS, the
   two required meta tags just come first.

2. `entry.get("enabled") is False` matched only the Python False
   singleton — `enabled: "false"` (quoted YAML string), `enabled: 0`,
   `enabled: "no"` etc. all slipped through (bool("false") == True in
   Python). For what is meant to be a kill switch on admin-injected JS,
   an operator who fat-fingers the quoting would silently leave the
   snippet live. Add `_custom_script_enabled()` coercion helper that
   honours quoted booleans, numeric 0, and the usual `no`/`off`/`""`
   variants. Default-on for missing / None to preserve the
   default-enabled field semantics.

13 new parametrized tests cover every YAML truthy-shape the operator
might paste. Render test now asserts head_start lands after both
required <meta> tags.
This commit is contained in:
Vojtech Rysanek 2026-05-21 13:59:11 +04:00
parent 4b48377d44
commit 58001af27d
5 changed files with 96 additions and 7 deletions

View file

@ -367,6 +367,29 @@ def get_instance_overview() -> str:
_CUSTOM_SCRIPT_PLACEMENTS = ("head_start", "head_end", "body_end")
def _custom_script_enabled(value) -> bool:
"""Coerce the per-entry ``enabled`` field tolerant of YAML's many
truthiness shapes.
Operators hand-editing YAML (or pasting blocks from another source)
can land ``enabled: "false"`` (quoted string), ``enabled: 0``, or
``enabled: no`` rather than the boolean ``false``. ``bool("false")``
is ``True`` in Python, so a naive truth check silently keeps the
script live a footgun for what's meant to be a kill switch on
admin-injected JS. Missing / ``None`` live (default-on, matches
the registered field shape).
"""
if value is None:
return True
if isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return value != 0
if isinstance(value, str):
return value.strip().lower() not in ("", "false", "no", "0", "off")
return bool(value)
def get_custom_scripts() -> list[dict]:
"""Operator-injected HTML/JS blocks rendered by ``base.html``.
@ -387,7 +410,9 @@ def get_custom_scripts() -> list[dict]:
``instance.logo_svg`` / ``instance.overview``.
Normalization:
- Drop entries with ``enabled=False``.
- Drop entries whose ``enabled`` resolves to false via
:func:`_custom_script_enabled` (handles quoted strings, 0/1, etc.
not just the Python ``False`` singleton).
- Drop entries whose ``html`` strips to empty.
- Default missing ``name`` to "" and missing ``placement`` to
"head_end".
@ -419,7 +444,7 @@ def get_custom_scripts() -> list[dict]:
idx, type(entry).__name__,
)
continue
if entry.get("enabled") is False:
if not _custom_script_enabled(entry.get("enabled")):
continue
html = (entry.get("html") or "").strip()
if not html:

View file

@ -1,14 +1,19 @@
<!DOCTYPE html>
<html lang="en" data-theme="{{ instance_theme | default('navy') }}">
<head>
{# HTML5 requires <meta charset> within the first 1024 bytes; any
operator-injected snippet must come AFTER charset + viewport to
avoid pushing the declaration past that window (which makes
browsers fall back to a locale-default encoding and historically
opened UTF-7 charset-confusion XSS). #}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
{# Operator-injected scripts (placement=head_start) — run before any
CSS/JS so vendors that need to install global hooks first (GTM
dataLayer init, etc.) work. Admin-only, see instance.custom_scripts. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_start' %}
{{ s.html | safe }}
{% endfor %}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}Data Analyst Portal{% endblock %}</title>
<link rel="stylesheet" href="{{ static_url('style-custom.css') }}">
{# Design-system tokens (`--ds-*`) — opt-in green/navy palette used

View file

@ -1,13 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
{# HTML5 requires <meta charset> within the first 1024 bytes; any
operator-injected snippet must come AFTER charset + viewport. See
base.html for the full rationale. #}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
{# Operator-injected scripts (placement=head_start). Mirrors base.html
so login/auth pages surface custom_scripts too. #}
{% for s in custom_scripts | default([]) if s.placement == 'head_start' %}
{{ s.html | safe }}
{% endfor %}
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}Data Analyst Portal{% endblock %}</title>
<link rel="stylesheet" href="{{ static_url('style-custom.css') }}">
{% include '_theme.html' %}

View file

@ -62,7 +62,7 @@ def test_head_end_snippet_lands_before_head_close(render_client, monkeypatch):
assert snippet_idx < head_close_idx, "head_end must render before </head>"
def test_head_start_snippet_lands_before_first_link(render_client, monkeypatch):
def test_head_start_snippet_lands_after_charset_before_first_link(render_client, monkeypatch):
_patch_scripts(monkeypatch, [{
"name": "gtm-init",
"enabled": True,
@ -73,8 +73,19 @@ def test_head_start_snippet_lands_before_first_link(render_client, monkeypatch):
sentinel = "AGNES_CUSTOM_SCRIPT_HEAD_START"
assert sentinel in body
snippet_idx = body.index(sentinel)
charset_idx = body.index('<meta charset="UTF-8">')
viewport_idx = body.index('<meta name="viewport"')
first_link_idx = body.index("<link")
head_close_idx = body.index("</head>")
# HTML5 spec: <meta charset> must appear within the first 1024 bytes.
# head_start MUST land after both required <meta> tags so a long
# operator snippet can't push the charset declaration past that window
# (which would trigger locale-default encoding fallback + historical
# UTF-7 charset-confusion XSS).
assert charset_idx < snippet_idx, "head_start must render AFTER <meta charset>"
assert viewport_idx < snippet_idx, "head_start must render AFTER <meta viewport>"
# Still before CSS/JS so vendor hooks (e.g. GTM dataLayer init) install
# before any other script can read them.
assert snippet_idx < first_link_idx, "head_start must render before first <link>"
assert snippet_idx < head_close_idx

View file

@ -295,3 +295,48 @@ class TestCustomScripts:
assert mod.get_custom_scripts() == []
assert any("must be a list" in r.message for r in caplog.records)
mod._instance_config = None
@pytest.mark.parametrize("enabled_yaml,expect_dropped", [
# Boolean false in every YAML truthy-shape the operator might use.
# All of these must drop the entry so the kill switch behaves the
# same regardless of whether the operator pasted a quoted block.
("false", True),
("False", True),
('"false"', True), # quoted string — bool("false") == True in Python
('"no"', True),
('"NO"', True),
('"off"', True),
('"0"', True),
("0", True),
# Boolean true / typical live values must keep the entry alive.
("true", False),
("True", False),
('"true"', False),
('"yes"', False),
("1", False),
])
def test_enabled_coercion(self, tmp_path, monkeypatch, enabled_yaml, expect_dropped):
"""Quoted-string + numeric `enabled` values must be coerced the same
way the operator expects from a Boolean field the kill switch is
the whole point of the field, and `bool("false") == True` would
silently leave the snippet live (review PR #372)."""
self._write(tmp_path, (
"instance:\n"
" name: Acme\n"
" custom_scripts:\n"
f" - name: probe\n"
f" enabled: {enabled_yaml}\n"
" placement: head_end\n"
" html: <script>1</script>\n"
))
mod = self._reload(tmp_path, monkeypatch)
scripts = mod.get_custom_scripts()
if expect_dropped:
assert scripts == [], (
f"enabled={enabled_yaml!r} should drop the entry but it survived"
)
else:
assert len(scripts) == 1, (
f"enabled={enabled_yaml!r} should keep the entry but it was dropped"
)
mod._instance_config = None