diff --git a/src/repositories/setup_banner.py b/src/repositories/setup_banner.py
index 5443754..172830c 100644
--- a/src/repositories/setup_banner.py
+++ b/src/repositories/setup_banner.py
@@ -1,7 +1,7 @@
"""Repository for the per-instance setup-page banner override (singleton row)."""
from datetime import datetime, timezone
-from typing import Any, Optional
+from typing import Any
import duckdb
diff --git a/src/repositories/welcome_template.py b/src/repositories/welcome_template.py
index a3f40f0..af3d91d 100644
--- a/src/repositories/welcome_template.py
+++ b/src/repositories/welcome_template.py
@@ -1,7 +1,7 @@
"""Repository for the per-instance welcome-prompt override (singleton row)."""
from datetime import datetime, timezone
-from typing import Any, Optional
+from typing import Any
import duckdb
diff --git a/src/setup_banner.py b/src/setup_banner.py
index e4d11a1..82f0274 100644
--- a/src/setup_banner.py
+++ b/src/setup_banner.py
@@ -8,6 +8,7 @@ data classification), not for analyst-side content.
from __future__ import annotations
import logging
+import re
from datetime import date, datetime, timezone
from typing import Any, Optional
from urllib.parse import urlparse
@@ -20,6 +21,38 @@ from src.repositories.setup_banner import SetupBannerRepository
_logger = logging.getLogger(__name__)
+# Patterns used by _sanitize_banner_html.
+_RE_SCRIPT = re.compile(r"<\s*script[\s\S]*?(?:\s*script\s*>|$)", re.IGNORECASE)
+_RE_IFRAME = re.compile(r"<\s*iframe[\s\S]*?(?:\s*iframe\s*>|$)", re.IGNORECASE)
+_RE_ON_ATTR = re.compile(r'\s+on\w+\s*=\s*(?:"[^"]*"|\'[^\']*\'|[^\s>]*)', re.IGNORECASE)
+_RE_JS_URI = re.compile(
+ r'((?:href|src)\s*=\s*["\'])(?:javascript|data):[^"\']*(["\'])',
+ re.IGNORECASE,
+)
+
+
+def _sanitize_banner_html(html: str) -> str:
+ """Strip the most dangerous markup patterns from rendered banner HTML.
+
+ Threat model: admins are trusted to author banner content, but mistakes
+ happen (copy-paste from untrusted sources, accidental script inclusion).
+ This is defense-in-depth, NOT a full XSS defense — for that, render
+ markdown only or add a strict Content-Security-Policy. The whitelist of
+ bad patterns is intentionally narrow so legitimate admin HTML is not
+ mangled.
+
+ What is stripped:
+ - ```` blocks (case-insensitive, including unclosed).
+ - ```` blocks.
+ - ``on*=`` event-handler attributes (e.g. onclick, onload, onerror).
+ - ``javascript:`` and ``data:`` URI schemes in href/src attributes.
+ """
+ html = _RE_SCRIPT.sub("", html)
+ html = _RE_IFRAME.sub("", html)
+ html = _RE_ON_ATTR.sub("", html)
+ html = _RE_JS_URI.sub(lambda m: m.group(1) + "#" + m.group(2), html)
+ return html
+
def build_setup_banner_context(
*,
@@ -76,7 +109,8 @@ def render_setup_banner(
env = Environment(undefined=StrictUndefined, autoescape=True)
try:
template = env.from_string(source)
- return template.render(**build_setup_banner_context(user=user, server_url=server_url))
+ rendered = template.render(**build_setup_banner_context(user=user, server_url=server_url))
+ return _sanitize_banner_html(rendered)
except TemplateError:
_logger.warning(
"setup_banner render failed; returning empty banner. "
diff --git a/tests/test_setup_banner_render.py b/tests/test_setup_banner_render.py
index fb7c0ed..e1dc571 100644
--- a/tests/test_setup_banner_render.py
+++ b/tests/test_setup_banner_render.py
@@ -5,7 +5,7 @@ import pytest
from src.db import _ensure_schema
from src.repositories.setup_banner import SetupBannerRepository
-from src.setup_banner import build_setup_banner_context, render_setup_banner
+from src.setup_banner import _sanitize_banner_html, build_setup_banner_context, render_setup_banner
@pytest.fixture
@@ -78,3 +78,44 @@ def test_autoescape_escapes_html_entities(conn):
)
# hostname won't contain < > but the render must succeed without injection
assert out != ""
+
+
+# ── Sanitizer unit tests ─────────────────────────────────────────────────────
+
+def test_render_strips_script_tags(conn):
+ """render_setup_banner must remove ',
+ updated_by="admin@example.com",
+ )
+ out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
+ assert "