fix(security): XSS hardening for setup banner + cleanup unused imports

- Add _sanitize_banner_html() to src/setup_banner.py: strips <script>/
  <iframe> blocks, on* event-handler attributes, and javascript:/data:
  URI schemes post-render (I-2). Defense-in-depth — /setup is partly
  anonymous so malformed admin content must not execute in visitors'
  browsers.
- Apply sanitizer in render_setup_banner() before returning rendered HTML.
- Add 3 unit tests: test_render_strips_script_tags,
  test_render_strips_event_handlers, test_render_strips_javascript_uri.
- Drop unused Optional import from src/repositories/welcome_template.py
  and src/repositories/setup_banner.py (M-6).
This commit is contained in:
ZdenekSrotyr 2026-05-02 20:56:21 +02:00
parent b0ec842804
commit b3ffc98e9f
4 changed files with 79 additions and 4 deletions

View file

@ -1,7 +1,7 @@
"""Repository for the per-instance setup-page banner override (singleton row).""" """Repository for the per-instance setup-page banner override (singleton row)."""
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Optional from typing import Any
import duckdb import duckdb

View file

@ -1,7 +1,7 @@
"""Repository for the per-instance welcome-prompt override (singleton row).""" """Repository for the per-instance welcome-prompt override (singleton row)."""
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Optional from typing import Any
import duckdb import duckdb

View file

@ -8,6 +8,7 @@ data classification), not for analyst-side content.
from __future__ import annotations from __future__ import annotations
import logging import logging
import re
from datetime import date, datetime, timezone from datetime import date, datetime, timezone
from typing import Any, Optional from typing import Any, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
@ -20,6 +21,38 @@ from src.repositories.setup_banner import SetupBannerRepository
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
# Patterns used by _sanitize_banner_html.
_RE_SCRIPT = re.compile(r"<\s*script[\s\S]*?(?:</\s*script\s*>|$)", re.IGNORECASE)
_RE_IFRAME = re.compile(r"<\s*iframe[\s\S]*?(?:</\s*iframe\s*>|$)", re.IGNORECASE)
_RE_ON_ATTR = re.compile(r'\s+on\w+\s*=\s*(?:"[^"]*"|\'[^\']*\'|[^\s>]*)', re.IGNORECASE)
_RE_JS_URI = re.compile(
r'((?:href|src)\s*=\s*["\'])(?:javascript|data):[^"\']*(["\'])',
re.IGNORECASE,
)
def _sanitize_banner_html(html: str) -> str:
"""Strip the most dangerous markup patterns from rendered banner HTML.
Threat model: admins are trusted to author banner content, but mistakes
happen (copy-paste from untrusted sources, accidental script inclusion).
This is defense-in-depth, NOT a full XSS defense for that, render
markdown only or add a strict Content-Security-Policy. The whitelist of
bad patterns is intentionally narrow so legitimate admin HTML is not
mangled.
What is stripped:
- ``<script>...</script>`` blocks (case-insensitive, including unclosed).
- ``<iframe>...</iframe>`` blocks.
- ``on*=`` event-handler attributes (e.g. onclick, onload, onerror).
- ``javascript:`` and ``data:`` URI schemes in href/src attributes.
"""
html = _RE_SCRIPT.sub("", html)
html = _RE_IFRAME.sub("", html)
html = _RE_ON_ATTR.sub("", html)
html = _RE_JS_URI.sub(lambda m: m.group(1) + "#" + m.group(2), html)
return html
def build_setup_banner_context( def build_setup_banner_context(
*, *,
@ -76,7 +109,8 @@ def render_setup_banner(
env = Environment(undefined=StrictUndefined, autoescape=True) env = Environment(undefined=StrictUndefined, autoescape=True)
try: try:
template = env.from_string(source) template = env.from_string(source)
return template.render(**build_setup_banner_context(user=user, server_url=server_url)) rendered = template.render(**build_setup_banner_context(user=user, server_url=server_url))
return _sanitize_banner_html(rendered)
except TemplateError: except TemplateError:
_logger.warning( _logger.warning(
"setup_banner render failed; returning empty banner. " "setup_banner render failed; returning empty banner. "

View file

@ -5,7 +5,7 @@ import pytest
from src.db import _ensure_schema from src.db import _ensure_schema
from src.repositories.setup_banner import SetupBannerRepository from src.repositories.setup_banner import SetupBannerRepository
from src.setup_banner import build_setup_banner_context, render_setup_banner from src.setup_banner import _sanitize_banner_html, build_setup_banner_context, render_setup_banner
@pytest.fixture @pytest.fixture
@ -78,3 +78,44 @@ def test_autoescape_escapes_html_entities(conn):
) )
# hostname won't contain < > but the render must succeed without injection # hostname won't contain < > but the render must succeed without injection
assert out != "" assert out != ""
# ── Sanitizer unit tests ─────────────────────────────────────────────────────
def test_render_strips_script_tags(conn):
"""render_setup_banner must remove <script> blocks from the output."""
SetupBannerRepository(conn).set(
'<p>Hello</p><script>alert(1)</script>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "<script>" not in out
assert "alert" not in out
# Safe content preserved
assert "Hello" in out
def test_render_strips_event_handlers(conn):
"""render_setup_banner must strip on* event-handler attributes."""
SetupBannerRepository(conn).set(
'<button onclick="evil()">Click me</button>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "onclick" not in out
assert "evil" not in out
# Button text preserved
assert "Click me" in out
def test_render_strips_javascript_uri(conn):
"""render_setup_banner must strip javascript: URI schemes from href/src."""
SetupBannerRepository(conn).set(
'<a href="javascript:evil()">link</a>',
updated_by="admin@example.com",
)
out = render_setup_banner(conn, user=_user(), server_url="https://example.com")
assert "javascript:" not in out
assert "evil" not in out
# Link text preserved
assert "link" in out