fix(security): XSS hardening for setup banner + cleanup unused imports

- Add _sanitize_banner_html() to src/setup_banner.py: strips <script>/
  <iframe> blocks, on* event-handler attributes, and javascript:/data:
  URI schemes post-render (I-2). Defense-in-depth — /setup is partly
  anonymous so malformed admin content must not execute in visitors'
  browsers.
- Apply sanitizer in render_setup_banner() before returning rendered HTML.
- Add 3 unit tests: test_render_strips_script_tags,
  test_render_strips_event_handlers, test_render_strips_javascript_uri.
- Drop unused Optional import from src/repositories/welcome_template.py
  and src/repositories/setup_banner.py (M-6).
This commit is contained in:
ZdenekSrotyr 2026-05-02 20:56:21 +02:00
parent b0ec842804
commit b3ffc98e9f
4 changed files with 79 additions and 4 deletions

View file

@ -1,7 +1,7 @@
"""Repository for the per-instance setup-page banner override (singleton row)."""
from datetime import datetime, timezone
from typing import Any, Optional
from typing import Any
import duckdb

View file

@ -1,7 +1,7 @@
"""Repository for the per-instance welcome-prompt override (singleton row)."""
from datetime import datetime, timezone
from typing import Any, Optional
from typing import Any
import duckdb

View file

@ -8,6 +8,7 @@ data classification), not for analyst-side content.
from __future__ import annotations
import logging
import re
from datetime import date, datetime, timezone
from typing import Any, Optional
from urllib.parse import urlparse
@ -20,6 +21,38 @@ from src.repositories.setup_banner import SetupBannerRepository
_logger = logging.getLogger(__name__)
# Patterns used by _sanitize_banner_html.
_RE_SCRIPT = re.compile(r"<\s*script[\s\S]*?(?:</\s*script\s*>|$)", re.IGNORECASE)
_RE_IFRAME = re.compile(r"<\s*iframe[\s\S]*?(?:</\s*iframe\s*>|$)", re.IGNORECASE)
_RE_ON_ATTR = re.compile(r'\s+on\w+\s*=\s*(?:"[^"]*"|\'[^\']*\'|[^\s>]*)', re.IGNORECASE)
_RE_JS_URI = re.compile(
r'((?:href|src)\s*=\s*["\'])(?:javascript|data):[^"\']*(["\'])',
re.IGNORECASE,
)
def _sanitize_banner_html(html: str) -> str:
"""Strip the most dangerous markup patterns from rendered banner HTML.
Threat model: admins are trusted to author banner content, but mistakes
happen (copy-paste from untrusted sources, accidental script inclusion).
This is defense-in-depth, NOT a full XSS defense for that, render
markdown only or add a strict Content-Security-Policy. The whitelist of
bad patterns is intentionally narrow so legitimate admin HTML is not
mangled.
What is stripped:
- ``<script>...</script>`` blocks (case-insensitive, including unclosed).
- ``<iframe>...</iframe>`` blocks.
- ``on*=`` event-handler attributes (e.g. onclick, onload, onerror).
- ``javascript:`` and ``data:`` URI schemes in href/src attributes.
"""
html = _RE_SCRIPT.sub("", html)
html = _RE_IFRAME.sub("", html)
html = _RE_ON_ATTR.sub("", html)
html = _RE_JS_URI.sub(lambda m: m.group(1) + "#" + m.group(2), html)
return html
def build_setup_banner_context(
*,
@ -76,7 +109,8 @@ def render_setup_banner(
env = Environment(undefined=StrictUndefined, autoescape=True)
try:
template = env.from_string(source)
return template.render(**build_setup_banner_context(user=user, server_url=server_url))
rendered = template.render(**build_setup_banner_context(user=user, server_url=server_url))
return _sanitize_banner_html(rendered)
except TemplateError:
_logger.warning(
"setup_banner render failed; returning empty banner. "

View file

@ -5,7 +5,7 @@ import pytest
from src.db import _ensure_schema
from src.repositories.setup_banner import SetupBannerRepository
from src.setup_banner import build_setup_banner_context, render_setup_banner
from src.setup_banner import _sanitize_banner_html, build_setup_banner_context, render_setup_banner
@pytest.fixture
@ -78,3 +78,44 @@ def test_autoescape_escapes_html_entities(conn):
)
# hostname won't contain < > but the render must succeed without injection
assert out != ""
# ── Sanitizer unit tests ─────────────────────────────────────────────────────
def test_render_strips_script_tags(conn):
"""render_setup_banner must remove <script> blocks from the output."""
SetupBannerRepository(conn).set(
'<p>Hello</p><script>alert(1)</script>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "<script>" not in out
assert "alert" not in out
# Safe content preserved
assert "Hello" in out
def test_render_strips_event_handlers(conn):
"""render_setup_banner must strip on* event-handler attributes."""
SetupBannerRepository(conn).set(
'<button onclick="evil()">Click me</button>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "onclick" not in out
assert "evil" not in out
# Button text preserved
assert "Click me" in out
def test_render_strips_javascript_uri(conn):
"""render_setup_banner must strip javascript: URI schemes from href/src."""
SetupBannerRepository(conn).set(
'<a href="javascript:evil()">link</a>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "javascript:" not in out
assert "evil" not in out
# Link text preserved
assert "link" in out