From b0c4eab162e13f302b6b756bcc5c94c7801fdec0 Mon Sep 17 00:00:00 2001
From: talklittle <talklittle+gitlab@gmail.com>
Date: Tue, 28 Jan 2025 19:33:14 +0000
Subject: [PATCH] Skip linkify escaped username reference

Closes tildes-community/tildes-cf#12

See merge request tildes-community/tildes-cf!10
---
 tildes/tests/test_markdown.py | 10 +++++++++
 tildes/tildes/lib/markdown.py | 41 ++++++++++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/tildes/tests/test_markdown.py b/tildes/tests/test_markdown.py
index 02e6734..74466a3 100644
--- a/tildes/tests/test_markdown.py
+++ b/tildes/tests/test_markdown.py
@@ -411,6 +411,16 @@ def test_username_reference_linkified():
     assert soup.find("a", href="/user/SomeUser")
 
 
+def test_escaped_username_reference_not_linkified():
+    """Ensure we don't linkify an escaped username reference."""
+    markdown = "Hey \@SomeUser, what do you think of this?"
+    processed = convert_markdown_to_safe_html(markdown)
+
+    soup = BeautifulSoup(processed, features="html5lib")
+    assert len(soup.find_all("a")) == 0
+    assert "@SomeUser" in processed
+
+
 def test_u_style_username_ref_linked():
     """Ensure a /u/username reference gets linkified."""
     markdown = "Hey /u/SomeUser, what do you think of this?"
diff --git a/tildes/tildes/lib/markdown.py b/tildes/tildes/lib/markdown.py
index f2dd4c8..506c3f6 100644
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@@ -6,6 +6,7 @@
 import re
 from collections.abc import Callable, Iterator
 from functools import partial
+from random import randint
 from typing import Any, Optional, Union
 
 import bleach
@@ -129,7 +130,7 @@ def convert_markdown_to_safe_html(
 ) -> str:
     """Convert markdown to sanitized HTML."""
     # apply custom pre-processing to markdown
-    markdown = preprocess_markdown(markdown)
+    markdown, replacements = preprocess_markdown(markdown)
 
     markdown_bytes = markdown.encode("utf8")
 
@@ -152,11 +153,14 @@ def convert_markdown_to_safe_html(
     # apply custom post-processing to HTML
     html = postprocess_markdown_html(html)
 
-    # add linkification and sanitize the final HTML before returning it
-    return linkify_and_sanitize_html(html, context)
+    # add linkification and sanitize the HTML
+    html = linkify_and_sanitize_html(html, context)
 
+    # finally restore any escaped substrings before returning HTML
+    return restore_replacements(html, replacements)
 
-def preprocess_markdown(markdown: str) -> str:
+
+def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]:
     """Pre-process markdown before passing it to CommonMark."""
     markdown = escape_accidental_ordered_lists(markdown)
 
@@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str:
     # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
     markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")
 
-    return markdown
+    # temporary replacements to restore after markdown processing
+    replacements = {}
+    if r"\@" in markdown:
+        # cmark rendering removes `\` before any punctuation,
+        # so `\@` becomes `@` and unexpectedly linkifies during the
+        # later linkify step.
+        # prevent that by using a replacement during cmark rendering.
+        replacements[r"\@"] = random_replacement_string(markdown)
+        markdown = markdown.replace(r"\@", replacements[r"\@"])
+
+    return (markdown, replacements)
+
+
+def random_replacement_string(markdown: str) -> str:
+    """Pick a random replacement string not present in input markdown."""
+    some_int = randint(1000000, 99999999999)
+    some_int_str = str(some_int)
+    if some_int_str in markdown:
+        return random_replacement_string(markdown)
+    else:
+        return some_int_str
 
 
 def merge_subsequent_blockquotes(markdown: str) -> str:
@@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str:
     return html
 
 
+def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str:
+    """Restore replacement strings to sanitized and linkified HTML."""
+    if r"\@" in preprocessed_replacements:
+        html = html.replace(preprocessed_replacements[r"\@"], "@")
+    return html
+
+
 class CodeHtmlFormatter(HtmlFormatter):
     """Custom Pygments HtmlFormatter to use a <code> tag.