Browse Source

Skip linkify escaped username reference

Closes tildes-community/tildes-cf#12

See merge request tildes-community/tildes-cf!10
develop
talklittle 4 weeks ago
committed by Andrew Shu
parent
commit
b0c4eab162
  1. 10
      tildes/tests/test_markdown.py
  2. 41
      tildes/tildes/lib/markdown.py

10
tildes/tests/test_markdown.py

@ -411,6 +411,16 @@ def test_username_reference_linkified():
assert soup.find("a", href="/user/SomeUser")
def test_escaped_username_reference_not_linkified():
"""Ensure we don't linkify an escaped username reference."""
markdown = "Hey \@SomeUser, what do you think of this?"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert len(soup.find_all("a")) == 0
assert "@SomeUser" in processed
def test_u_style_username_ref_linked():
"""Ensure a /u/username reference gets linkified."""
markdown = "Hey /u/SomeUser, what do you think of this?"

41
tildes/tildes/lib/markdown.py

@ -6,6 +6,7 @@
import re
from collections.abc import Callable, Iterator
from functools import partial
from random import randint
from typing import Any, Optional, Union
import bleach
@ -129,7 +130,7 @@ def convert_markdown_to_safe_html(
) -> str:
"""Convert markdown to sanitized HTML."""
# apply custom pre-processing to markdown
markdown = preprocess_markdown(markdown)
markdown, replacements = preprocess_markdown(markdown)
markdown_bytes = markdown.encode("utf8")
@ -152,11 +153,14 @@ def convert_markdown_to_safe_html(
# apply custom post-processing to HTML
html = postprocess_markdown_html(html)
# add linkification and sanitize the final HTML before returning it
return linkify_and_sanitize_html(html, context)
# add linkification and sanitize the HTML
html = linkify_and_sanitize_html(html, context)
# finally restore any escaped substrings before returning HTML
return restore_replacements(html, replacements)
def preprocess_markdown(markdown: str) -> str:
def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]:
"""Pre-process markdown before passing it to CommonMark."""
markdown = escape_accidental_ordered_lists(markdown)
@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str:
# fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")
return markdown
# temporary replacements to restore after markdown processing
replacements = {}
if r"\@" in markdown:
# cmark rendering removes `\` before any punctuation,
# so `\@` becomes `@` and unexpectedly linkifies during the
# later linkify step.
# prevent that by using a replacement during cmark rendering.
replacements[r"\@"] = random_replacement_string(markdown)
markdown = markdown.replace(r"\@", replacements[r"\@"])
return (markdown, replacements)
def random_replacement_string(markdown: str) -> str:
"""Pick a random replacement string not present in input markdown."""
some_int = randint(1000000, 99999999999)
some_int_str = str(some_int)
if some_int_str in markdown:
return random_replacement_string(markdown)
else:
return some_int_str
def merge_subsequent_blockquotes(markdown: str) -> str:
@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str:
return html
def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str:
"""Restore replacement strings to sanitized and linkified HTML."""
if r"\@" in preprocessed_replacements:
html = html.replace(preprocessed_replacements[r"\@"], "@")
return html
class CodeHtmlFormatter(HtmlFormatter):
"""Custom Pygments HtmlFormatter to use a <code> tag.

Loading…
Cancel
Save