diff --git a/src/repositories/setup_banner.py b/src/repositories/setup_banner.py index 5443754..172830c 100644 --- a/src/repositories/setup_banner.py +++ b/src/repositories/setup_banner.py @@ -1,7 +1,7 @@ """Repository for the per-instance setup-page banner override (singleton row).""" from datetime import datetime, timezone -from typing import Any, Optional +from typing import Any import duckdb diff --git a/src/repositories/welcome_template.py b/src/repositories/welcome_template.py index a3f40f0..af3d91d 100644 --- a/src/repositories/welcome_template.py +++ b/src/repositories/welcome_template.py @@ -1,7 +1,7 @@ """Repository for the per-instance welcome-prompt override (singleton row).""" from datetime import datetime, timezone -from typing import Any, Optional +from typing import Any import duckdb diff --git a/src/setup_banner.py b/src/setup_banner.py index e4d11a1..82f0274 100644 --- a/src/setup_banner.py +++ b/src/setup_banner.py @@ -8,6 +8,7 @@ data classification), not for analyst-side content. from __future__ import annotations import logging +import re from datetime import date, datetime, timezone from typing import Any, Optional from urllib.parse import urlparse @@ -20,6 +21,38 @@ from src.repositories.setup_banner import SetupBannerRepository _logger = logging.getLogger(__name__) +# Patterns used by _sanitize_banner_html. +_RE_SCRIPT = re.compile(r"<\s*script[\s\S]*?(?:|$)", re.IGNORECASE) +_RE_IFRAME = re.compile(r"<\s*iframe[\s\S]*?(?:|$)", re.IGNORECASE) +_RE_ON_ATTR = re.compile(r'\s+on\w+\s*=\s*(?:"[^"]*"|\'[^\']*\'|[^\s>]*)', re.IGNORECASE) +_RE_JS_URI = re.compile( + r'((?:href|src)\s*=\s*["\'])(?:javascript|data):[^"\']*(["\'])', + re.IGNORECASE, +) + + +def _sanitize_banner_html(html: str) -> str: + """Strip the most dangerous markup patterns from rendered banner HTML. + + Threat model: admins are trusted to author banner content, but mistakes + happen (copy-paste from untrusted sources, accidental script inclusion). + This is defense-in-depth, NOT a full XSS defense — for that, render + markdown only or add a strict Content-Security-Policy. The whitelist of + bad patterns is intentionally narrow so legitimate admin HTML is not + mangled. + + What is stripped: + - ```` blocks (case-insensitive, including unclosed). + - ```` blocks. + - ``on*=`` event-handler attributes (e.g. onclick, onload, onerror). + - ``javascript:`` and ``data:`` URI schemes in href/src attributes. + """ + html = _RE_SCRIPT.sub("", html) + html = _RE_IFRAME.sub("", html) + html = _RE_ON_ATTR.sub("", html) + html = _RE_JS_URI.sub(lambda m: m.group(1) + "#" + m.group(2), html) + return html + def build_setup_banner_context( *, @@ -76,7 +109,8 @@ def render_setup_banner( env = Environment(undefined=StrictUndefined, autoescape=True) try: template = env.from_string(source) - return template.render(**build_setup_banner_context(user=user, server_url=server_url)) + rendered = template.render(**build_setup_banner_context(user=user, server_url=server_url)) + return _sanitize_banner_html(rendered) except TemplateError: _logger.warning( "setup_banner render failed; returning empty banner. " diff --git a/tests/test_setup_banner_render.py b/tests/test_setup_banner_render.py index fb7c0ed..e1dc571 100644 --- a/tests/test_setup_banner_render.py +++ b/tests/test_setup_banner_render.py @@ -5,7 +5,7 @@ import pytest from src.db import _ensure_schema from src.repositories.setup_banner import SetupBannerRepository -from src.setup_banner import build_setup_banner_context, render_setup_banner +from src.setup_banner import _sanitize_banner_html, build_setup_banner_context, render_setup_banner @pytest.fixture @@ -78,3 +78,44 @@ def test_autoescape_escapes_html_entities(conn): ) # hostname won't contain < > but the render must succeed without injection assert out != "" + + +# ── Sanitizer unit tests ───────────────────────────────────────────────────── + +def test_render_strips_script_tags(conn): + """render_setup_banner must remove ', + updated_by="admin@example.com", + ) + out = render_setup_banner(conn, user=_user(), server_url="https://example.com") + assert "