Browse Source

Rename BleachContext, simplify default case

Renames BleachContext to HTMLSanitizationContext, and simplifies the
default case a little by just allowing None as the context instead of
needing an explicit DEFAULT enum member.
merge-requests/69/head
Deimos 5 years ago
parent
commit
a2ee9ea027
  1. 6
      tildes/tests/test_markdown.py
  2. 5
      tildes/tildes/enums.py
  3. 28
      tildes/tildes/lib/markdown.py
  4. 4
      tildes/tildes/models/user/user.py

6
tildes/tests/test_markdown.py

@ -3,7 +3,7 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from tildes.enums import BleachContext
from tildes.enums import HTMLSanitizationContext
from tildes.lib.markdown import convert_markdown_to_safe_html from tildes.lib.markdown import convert_markdown_to_safe_html
@ -403,6 +403,8 @@ def test_a_rel_removed_default_context():
def test_a_rel_kept_user_bio_context(): def test_a_rel_kept_user_bio_context():
"""Ensure a rel= attr is kept on an <a> tag in the user bio context.""" """Ensure a rel= attr is kept on an <a> tag in the user bio context."""
markdown = '<a href="http://example.com" rel="something">Link</a>' markdown = '<a href="http://example.com" rel="something">Link</a>'
processed = convert_markdown_to_safe_html(markdown, BleachContext.USER_BIO)
processed = convert_markdown_to_safe_html(
markdown, HTMLSanitizationContext.USER_BIO
)
assert "rel=" in processed assert "rel=" in processed

5
tildes/tildes/enums.py

@ -143,8 +143,7 @@ class TopicType(enum.Enum):
LINK = enum.auto() LINK = enum.auto()
class BleachContext(enum.Enum):
"""Enum for the possible contexts of Bleach HTML sanitization."""
class HTMLSanitizationContext(enum.Enum):
"""Enum for the possible contexts for HTML sanitization."""
DEFAULT = enum.auto()
USER_BIO = enum.auto() USER_BIO = enum.auto()

28
tildes/tildes/lib/markdown.py

@ -5,7 +5,7 @@
from functools import partial from functools import partial
import re import re
from typing import Any, Callable, Dict, Iterator, List, Match, Optional, Pattern, Tuple
from typing import Any, Callable, Iterator, List, Match, Optional, Pattern, Tuple
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import bleach import bleach
@ -16,7 +16,7 @@ from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name, PhpLexer from pygments.lexers import get_lexer_by_name, PhpLexer
from pygments.util import ClassNotFound from pygments.util import ClassNotFound
from tildes.enums import BleachContext
from tildes.enums import HTMLSanitizationContext
from tildes.metrics import histogram_timer from tildes.metrics import histogram_timer
from tildes.schemas.group import is_valid_group_path from tildes.schemas.group import is_valid_group_path
from tildes.schemas.user import is_valid_username from tildes.schemas.user import is_valid_username
@ -88,6 +88,8 @@ HTML_TAG_WHITELIST = (
"tr", "tr",
"ul", "ul",
) )
PROTOCOL_WHITELIST = ("http", "https", "mailto")
HTML_ATTRIBUTE_WHITELIST_DEFAULT = { HTML_ATTRIBUTE_WHITELIST_DEFAULT = {
"a": ["href", "title"], "a": ["href", "title"],
"ol": ["start"], "ol": ["start"],
@ -96,12 +98,10 @@ HTML_ATTRIBUTE_WHITELIST_DEFAULT = {
"code": allow_syntax_highlighting_classes, "code": allow_syntax_highlighting_classes,
"span": allow_syntax_highlighting_classes, "span": allow_syntax_highlighting_classes,
} }
PROTOCOL_WHITELIST = ("http", "https", "mailto")
# per-context overrides for allowed attributes # per-context overrides for allowed attributes
HTML_ATTRIBUTE_WHITELIST_OVERRIDES: Dict[BleachContext, Dict[str, List[str]]] = {
BleachContext.DEFAULT: {},
BleachContext.USER_BIO: {"a": ["href", "title", "rel"]},
HTML_ATTRIBUTE_WHITELIST_OVERRIDES = {
HTMLSanitizationContext.USER_BIO: {"a": ["href", "title", "rel"]}
} }
# Regex that finds ordered list markdown that was probably accidental - ones being # Regex that finds ordered list markdown that was probably accidental - ones being
@ -119,7 +119,7 @@ SUBSEQUENT_BLOCKQUOTES_REGEX = re.compile("^>([^\n]*?)\n\n(?=>)", flags=re.MULTI
@histogram_timer("markdown_processing") @histogram_timer("markdown_processing")
def convert_markdown_to_safe_html( def convert_markdown_to_safe_html(
markdown: str, context: BleachContext = BleachContext.DEFAULT
markdown: str, context: Optional[HTMLSanitizationContext] = None
) -> str: ) -> str:
"""Convert markdown to sanitized HTML.""" """Convert markdown to sanitized HTML."""
# apply custom pre-processing to markdown # apply custom pre-processing to markdown
@ -459,18 +459,20 @@ class LinkifyFilter(Filter):
return [{"type": "Characters", "data": match[0]}] return [{"type": "Characters", "data": match[0]}]
def linkify_and_sanitize_html(html: str, context: BleachContext) -> str:
def linkify_and_sanitize_html(
html: str, context: Optional[HTMLSanitizationContext] = None
) -> str:
"""Use bleach and html5lib filters to linkify and sanitize HTML.""" """Use bleach and html5lib filters to linkify and sanitize HTML."""
# list of tag names to exclude from linkification # list of tag names to exclude from linkification
linkify_skipped_tags = ["code", "pre"] linkify_skipped_tags = ["code", "pre"]
tildes_linkifier = partial(LinkifyFilter, skip_tags=linkify_skipped_tags) tildes_linkifier = partial(LinkifyFilter, skip_tags=linkify_skipped_tags)
# include overrides for the current context
attribute_whitelist = {
**HTML_ATTRIBUTE_WHITELIST_DEFAULT,
**HTML_ATTRIBUTE_WHITELIST_OVERRIDES[context],
}
attribute_whitelist = HTML_ATTRIBUTE_WHITELIST_DEFAULT
if context:
# include overrides for the current context
overrides = HTML_ATTRIBUTE_WHITELIST_OVERRIDES.get(context, {})
attribute_whitelist = {**attribute_whitelist, **overrides}
cleaner = bleach.Cleaner( cleaner = bleach.Cleaner(
tags=HTML_TAG_WHITELIST, tags=HTML_TAG_WHITELIST,

4
tildes/tildes/models/user/user.py

@ -32,7 +32,7 @@ from sqlalchemy.orm import deferred
from sqlalchemy.sql.expression import text from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree from sqlalchemy_utils import Ltree
from tildes.enums import BleachContext, CommentLabelOption, TopicSortOption
from tildes.enums import CommentLabelOption, HTMLSanitizationContext, TopicSortOption
from tildes.lib.database import ArrayOfLtree, CIText from tildes.lib.database import ArrayOfLtree, CIText
from tildes.lib.datetime import utc_now from tildes.lib.datetime import utc_now
from tildes.lib.hash import hash_string, is_match_for_hash from tildes.lib.hash import hash_string, is_match_for_hash
@ -156,7 +156,7 @@ class User(DatabaseModel):
if self._bio_markdown is not None: if self._bio_markdown is not None:
self.bio_rendered_html = convert_markdown_to_safe_html( self.bio_rendered_html = convert_markdown_to_safe_html(
new_markdown, BleachContext.USER_BIO
new_markdown, HTMLSanitizationContext.USER_BIO
) )
else: else:
self.bio_rendered_html = None self.bio_rendered_html = None

Loading…
Cancel
Save