Browse Source

Skip linkify escaped username reference

Closes tildes-community/tildes-cf#12

See merge request tildes-community/tildes-cf!10
develop
talklittle 4 weeks ago
committed by Andrew Shu
parent
commit
b0c4eab162
  1. 10
      tildes/tests/test_markdown.py
  2. 41
      tildes/tildes/lib/markdown.py

10
tildes/tests/test_markdown.py

@ -411,6 +411,16 @@ def test_username_reference_linkified():
assert soup.find("a", href="/user/SomeUser") assert soup.find("a", href="/user/SomeUser")
def test_escaped_username_reference_not_linkified():
"""Ensure we don't linkify an escaped username reference."""
markdown = "Hey \@SomeUser, what do you think of this?"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert len(soup.find_all("a")) == 0
assert "@SomeUser" in processed
def test_u_style_username_ref_linked(): def test_u_style_username_ref_linked():
"""Ensure a /u/username reference gets linkified.""" """Ensure a /u/username reference gets linkified."""
markdown = "Hey /u/SomeUser, what do you think of this?" markdown = "Hey /u/SomeUser, what do you think of this?"

41
tildes/tildes/lib/markdown.py

@ -6,6 +6,7 @@
import re import re
from collections.abc import Callable, Iterator from collections.abc import Callable, Iterator
from functools import partial from functools import partial
from random import randint
from typing import Any, Optional, Union from typing import Any, Optional, Union
import bleach import bleach
@ -129,7 +130,7 @@ def convert_markdown_to_safe_html(
) -> str: ) -> str:
"""Convert markdown to sanitized HTML.""" """Convert markdown to sanitized HTML."""
# apply custom pre-processing to markdown # apply custom pre-processing to markdown
markdown = preprocess_markdown(markdown)
markdown, replacements = preprocess_markdown(markdown)
markdown_bytes = markdown.encode("utf8") markdown_bytes = markdown.encode("utf8")
@ -152,11 +153,14 @@ def convert_markdown_to_safe_html(
# apply custom post-processing to HTML # apply custom post-processing to HTML
html = postprocess_markdown_html(html) html = postprocess_markdown_html(html)
# add linkification and sanitize the final HTML before returning it
return linkify_and_sanitize_html(html, context)
# add linkification and sanitize the HTML
html = linkify_and_sanitize_html(html, context)
# finally restore any escaped substrings before returning HTML
return restore_replacements(html, replacements)
def preprocess_markdown(markdown: str) -> str:
def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]:
"""Pre-process markdown before passing it to CommonMark.""" """Pre-process markdown before passing it to CommonMark."""
markdown = escape_accidental_ordered_lists(markdown) markdown = escape_accidental_ordered_lists(markdown)
@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str:
# fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯") markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")
return markdown
# temporary replacements to restore after markdown processing
replacements = {}
if r"\@" in markdown:
# cmark rendering removes `\` before any punctuation,
# so `\@` becomes `@` and unexpectedly linkifies during the
# later linkify step.
# prevent that by using a replacement during cmark rendering.
replacements[r"\@"] = random_replacement_string(markdown)
markdown = markdown.replace(r"\@", replacements[r"\@"])
return (markdown, replacements)
def random_replacement_string(markdown: str) -> str:
"""Pick a random replacement string not present in input markdown."""
some_int = randint(1000000, 99999999999)
some_int_str = str(some_int)
if some_int_str in markdown:
return random_replacement_string(markdown)
else:
return some_int_str
def merge_subsequent_blockquotes(markdown: str) -> str: def merge_subsequent_blockquotes(markdown: str) -> str:
@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str:
return html return html
def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str:
"""Restore replacement strings to sanitized and linkified HTML."""
if r"\@" in preprocessed_replacements:
html = html.replace(preprocessed_replacements[r"\@"], "@")
return html
class CodeHtmlFormatter(HtmlFormatter): class CodeHtmlFormatter(HtmlFormatter):
"""Custom Pygments HtmlFormatter to use a <code> tag. """Custom Pygments HtmlFormatter to use a <code> tag.

Loading…
Cancel
Save