From b0c4eab162e13f302b6b756bcc5c94c7801fdec0 Mon Sep 17 00:00:00 2001 From: talklittle Date: Tue, 28 Jan 2025 19:33:14 +0000 Subject: [PATCH] Skip linkify escaped username reference Closes tildes-community/tildes-cf#12 See merge request tildes-community/tildes-cf!10 --- tildes/tests/test_markdown.py | 10 +++++++++ tildes/tildes/lib/markdown.py | 41 ++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tildes/tests/test_markdown.py b/tildes/tests/test_markdown.py index 02e6734..74466a3 100644 --- a/tildes/tests/test_markdown.py +++ b/tildes/tests/test_markdown.py @@ -411,6 +411,16 @@ def test_username_reference_linkified(): assert soup.find("a", href="/user/SomeUser") +def test_escaped_username_reference_not_linkified(): + """Ensure we don't linkify an escaped username reference.""" + markdown = "Hey \@SomeUser, what do you think of this?" + processed = convert_markdown_to_safe_html(markdown) + + soup = BeautifulSoup(processed, features="html5lib") + assert len(soup.find_all("a")) == 0 + assert "@SomeUser" in processed + + def test_u_style_username_ref_linked(): """Ensure a /u/username reference gets linkified.""" markdown = "Hey /u/SomeUser, what do you think of this?" diff --git a/tildes/tildes/lib/markdown.py b/tildes/tildes/lib/markdown.py index f2dd4c8..506c3f6 100644 --- a/tildes/tildes/lib/markdown.py +++ b/tildes/tildes/lib/markdown.py @@ -6,6 +6,7 @@ import re from collections.abc import Callable, Iterator from functools import partial +from random import randint from typing import Any, Optional, Union import bleach @@ -129,7 +130,7 @@ def convert_markdown_to_safe_html( ) -> str: """Convert markdown to sanitized HTML.""" # apply custom pre-processing to markdown - markdown = preprocess_markdown(markdown) + markdown, replacements = preprocess_markdown(markdown) markdown_bytes = markdown.encode("utf8") @@ -152,11 +153,14 @@ def convert_markdown_to_safe_html( # apply custom post-processing to HTML html = postprocess_markdown_html(html) - # add linkification and sanitize the final HTML before returning it - return linkify_and_sanitize_html(html, context) + # add linkification and sanitize the HTML + html = linkify_and_sanitize_html(html, context) + # finally restore any escaped substrings before returning HTML + return restore_replacements(html, replacements) -def preprocess_markdown(markdown: str) -> str: + +def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]: """Pre-process markdown before passing it to CommonMark.""" markdown = escape_accidental_ordered_lists(markdown) @@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str: # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯") - return markdown + # temporary replacements to restore after markdown processing + replacements = {} + if r"\@" in markdown: + # cmark rendering removes `\` before any punctuation, + # so `\@` becomes `@` and unexpectedly linkifies during the + # later linkify step. + # prevent that by using a replacement during cmark rendering. + replacements[r"\@"] = random_replacement_string(markdown) + markdown = markdown.replace(r"\@", replacements[r"\@"]) + + return (markdown, replacements) + + +def random_replacement_string(markdown: str) -> str: + """Pick a random replacement string not present in input markdown.""" + some_int = randint(1000000, 99999999999) + some_int_str = str(some_int) + if some_int_str in markdown: + return random_replacement_string(markdown) + else: + return some_int_str def merge_subsequent_blockquotes(markdown: str) -> str: @@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str: return html +def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str: + """Restore replacement strings to sanitized and linkified HTML.""" + if r"\@" in preprocessed_replacements: + html = html.replace(preprocessed_replacements[r"\@"], "@") + return html + + class CodeHtmlFormatter(HtmlFormatter): """Custom Pygments HtmlFormatter to use a tag.