diff --git a/tildes/tests/test_markdown.py b/tildes/tests/test_markdown.py
index de807e3..e11d8b3 100644
--- a/tildes/tests/test_markdown.py
+++ b/tildes/tests/test_markdown.py
@@ -3,7 +3,7 @@
from bs4 import BeautifulSoup
-from tildes.enums import BleachContext
+from tildes.enums import HTMLSanitizationContext
from tildes.lib.markdown import convert_markdown_to_safe_html
@@ -403,6 +403,8 @@ def test_a_rel_removed_default_context():
def test_a_rel_kept_user_bio_context():
"""Ensure a rel= attr is kept on an tag in the user bio context."""
markdown = 'Link'
- processed = convert_markdown_to_safe_html(markdown, BleachContext.USER_BIO)
+ processed = convert_markdown_to_safe_html(
+ markdown, HTMLSanitizationContext.USER_BIO
+ )
assert "rel=" in processed
diff --git a/tildes/tildes/enums.py b/tildes/tildes/enums.py
index 9f27ec8..c1150b1 100644
--- a/tildes/tildes/enums.py
+++ b/tildes/tildes/enums.py
@@ -143,8 +143,7 @@ class TopicType(enum.Enum):
LINK = enum.auto()
-class BleachContext(enum.Enum):
- """Enum for the possible contexts of Bleach HTML sanitization."""
+class HTMLSanitizationContext(enum.Enum):
+ """Enum for the possible contexts for HTML sanitization."""
- DEFAULT = enum.auto()
USER_BIO = enum.auto()
diff --git a/tildes/tildes/lib/markdown.py b/tildes/tildes/lib/markdown.py
index 425fe06..8d1d8ef 100644
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@@ -5,7 +5,7 @@
from functools import partial
import re
-from typing import Any, Callable, Dict, Iterator, List, Match, Optional, Pattern, Tuple
+from typing import Any, Callable, Iterator, List, Match, Optional, Pattern, Tuple
from bs4 import BeautifulSoup
import bleach
@@ -16,7 +16,7 @@ from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name, PhpLexer
from pygments.util import ClassNotFound
-from tildes.enums import BleachContext
+from tildes.enums import HTMLSanitizationContext
from tildes.metrics import histogram_timer
from tildes.schemas.group import is_valid_group_path
from tildes.schemas.user import is_valid_username
@@ -88,6 +88,8 @@ HTML_TAG_WHITELIST = (
"tr",
"ul",
)
+PROTOCOL_WHITELIST = ("http", "https", "mailto")
+
HTML_ATTRIBUTE_WHITELIST_DEFAULT = {
"a": ["href", "title"],
"ol": ["start"],
@@ -96,12 +98,10 @@ HTML_ATTRIBUTE_WHITELIST_DEFAULT = {
"code": allow_syntax_highlighting_classes,
"span": allow_syntax_highlighting_classes,
}
-PROTOCOL_WHITELIST = ("http", "https", "mailto")
# per-context overrides for allowed attributes
-HTML_ATTRIBUTE_WHITELIST_OVERRIDES: Dict[BleachContext, Dict[str, List[str]]] = {
- BleachContext.DEFAULT: {},
- BleachContext.USER_BIO: {"a": ["href", "title", "rel"]},
+HTML_ATTRIBUTE_WHITELIST_OVERRIDES = {
+ HTMLSanitizationContext.USER_BIO: {"a": ["href", "title", "rel"]}
}
# Regex that finds ordered list markdown that was probably accidental - ones being
@@ -119,7 +119,7 @@ SUBSEQUENT_BLOCKQUOTES_REGEX = re.compile("^>([^\n]*?)\n\n(?=>)", flags=re.MULTI
@histogram_timer("markdown_processing")
def convert_markdown_to_safe_html(
- markdown: str, context: BleachContext = BleachContext.DEFAULT
+ markdown: str, context: Optional[HTMLSanitizationContext] = None
) -> str:
"""Convert markdown to sanitized HTML."""
# apply custom pre-processing to markdown
@@ -459,18 +459,20 @@ class LinkifyFilter(Filter):
return [{"type": "Characters", "data": match[0]}]
-def linkify_and_sanitize_html(html: str, context: BleachContext) -> str:
+def linkify_and_sanitize_html(
+ html: str, context: Optional[HTMLSanitizationContext] = None
+) -> str:
"""Use bleach and html5lib filters to linkify and sanitize HTML."""
# list of tag names to exclude from linkification
linkify_skipped_tags = ["code", "pre"]
tildes_linkifier = partial(LinkifyFilter, skip_tags=linkify_skipped_tags)
- # include overrides for the current context
- attribute_whitelist = {
- **HTML_ATTRIBUTE_WHITELIST_DEFAULT,
- **HTML_ATTRIBUTE_WHITELIST_OVERRIDES[context],
- }
+ attribute_whitelist = HTML_ATTRIBUTE_WHITELIST_DEFAULT
+ if context:
+ # include overrides for the current context
+ overrides = HTML_ATTRIBUTE_WHITELIST_OVERRIDES.get(context, {})
+ attribute_whitelist = {**attribute_whitelist, **overrides}
cleaner = bleach.Cleaner(
tags=HTML_TAG_WHITELIST,
diff --git a/tildes/tildes/models/user/user.py b/tildes/tildes/models/user/user.py
index d0cd78c..9b77e2b 100644
--- a/tildes/tildes/models/user/user.py
+++ b/tildes/tildes/models/user/user.py
@@ -32,7 +32,7 @@ from sqlalchemy.orm import deferred
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree
-from tildes.enums import BleachContext, CommentLabelOption, TopicSortOption
+from tildes.enums import CommentLabelOption, HTMLSanitizationContext, TopicSortOption
from tildes.lib.database import ArrayOfLtree, CIText
from tildes.lib.datetime import utc_now
from tildes.lib.hash import hash_string, is_match_for_hash
@@ -156,7 +156,7 @@ class User(DatabaseModel):
if self._bio_markdown is not None:
self.bio_rendered_html = convert_markdown_to_safe_html(
- new_markdown, BleachContext.USER_BIO
+ new_markdown, HTMLSanitizationContext.USER_BIO
)
else:
self.bio_rendered_html = None