Skip linkify escaped username reference

Closes tildes-community/tildes-cf#12 See merge request tildes-community/tildes-cf!10
11 months ago · b0c4eab162
2 changed files with 46 additions and 5 deletions
--- a/tildes/tests/test_markdown.py
+++ b/tildes/tests/test_markdown.py
@ -411,6 +411,16 @@ def test_username_reference_linkified():
    assert soup.find("a", href="/user/SomeUser")


+def test_escaped_username_reference_not_linkified():
+    """Ensure we don't linkify an escaped username reference."""
+    markdown = "Hey \@SomeUser, what do you think of this?"
+    processed = convert_markdown_to_safe_html(markdown)
+
+    soup = BeautifulSoup(processed, features="html5lib")
+    assert len(soup.find_all("a")) == 0
+    assert "@SomeUser" in processed
+
+
 def test_u_style_username_ref_linked():
    """Ensure a /u/username reference gets linkified."""
    markdown = "Hey /u/SomeUser, what do you think of this?"
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@ -6,6 +6,7 @@
 import re
 from collections.abc import Callable, Iterator
 from functools import partial
+from random import randint
 from typing import Any, Optional, Union

 import bleach
@ -129,7 +130,7 @@ def convert_markdown_to_safe_html(
 ) -> str:
    """Convert markdown to sanitized HTML."""
    # apply custom pre-processing to markdown
-    markdown = preprocess_markdown(markdown)
+    markdown, replacements = preprocess_markdown(markdown)

    markdown_bytes = markdown.encode("utf8")

@ -152,11 +153,14 @@ def convert_markdown_to_safe_html(
    # apply custom post-processing to HTML
    html = postprocess_markdown_html(html)

-    # add linkification and sanitize the final HTML before returning it
-    return linkify_and_sanitize_html(html, context)
+    # add linkification and sanitize the HTML
+    html = linkify_and_sanitize_html(html, context)

+    # finally restore any escaped substrings before returning HTML
+    return restore_replacements(html, replacements)

-def preprocess_markdown(markdown: str) -> str:
+
+def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]:
    """Pre-process markdown before passing it to CommonMark."""
    markdown = escape_accidental_ordered_lists(markdown)

@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str:
    # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
    markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")

-    return markdown
+    # temporary replacements to restore after markdown processing
+    replacements = {}
+    if r"\@" in markdown:
+        # cmark rendering removes `\` before any punctuation,
+        # so `\@` becomes `@` and unexpectedly linkifies during the
+        # later linkify step.
+        # prevent that by using a replacement during cmark rendering.
+        replacements[r"\@"] = random_replacement_string(markdown)
+        markdown = markdown.replace(r"\@", replacements[r"\@"])
+
+    return (markdown, replacements)
+
+
+def random_replacement_string(markdown: str) -> str:
+    """Pick a random replacement string not present in input markdown."""
+    some_int = randint(1000000, 99999999999)
+    some_int_str = str(some_int)
+    if some_int_str in markdown:
+        return random_replacement_string(markdown)
+    else:
+        return some_int_str


 def merge_subsequent_blockquotes(markdown: str) -> str:
@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str:
    return html


+def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str:
+    """Restore replacement strings to sanitized and linkified HTML."""
+    if r"\@" in preprocessed_replacements:
+        html = html.replace(preprocessed_replacements[r"\@"], "@")
+    return html
+
+
 class CodeHtmlFormatter(HtmlFormatter):
    """Custom Pygments HtmlFormatter to use a <code> tag.