Skip linkify escaped username reference

Closes tildes-community/tildes-cf#12 See merge request tildes-community/tildes-cf!10
11 months ago · b0c4eab162
2 changed files with 46 additions and 5 deletions
--- a/tildes/tests/test_markdown.py
+++ b/tildes/tests/test_markdown.py
@ -411,6 +411,16 @@ def test_username_reference_linkified():
    assert soup.find("a", href="/user/SomeUser")
 def test_escaped_username_reference_not_linkified():
    """Ensure we don't linkify an escaped username reference."""
    markdown = "Hey \@SomeUser, what do you think of this?"
    processed = convert_markdown_to_safe_html(markdown)
    soup = BeautifulSoup(processed, features="html5lib")
    assert len(soup.find_all("a")) == 0
    assert "@SomeUser" in processed
 def test_u_style_username_ref_linked():
    """Ensure a /u/username reference gets linkified."""
    markdown = "Hey /u/SomeUser, what do you think of this?"
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@ -6,6 +6,7 @@
 import re
 from collections.abc import Callable, Iterator
 from functools import partial
 from random import randint
 from typing import Any, Optional, Union
 import bleach
@ -129,7 +130,7 @@ def convert_markdown_to_safe_html(
 ) -> str:
    """Convert markdown to sanitized HTML."""
    # apply custom pre-processing to markdown
    markdown = preprocess_markdown(markdown)
    markdown, replacements = preprocess_markdown(markdown)
    markdown_bytes = markdown.encode("utf8")
@ -152,11 +153,14 @@ def convert_markdown_to_safe_html(
    # apply custom post-processing to HTML
    html = postprocess_markdown_html(html)
    # add linkification and sanitize the final HTML before returning it
    return linkify_and_sanitize_html(html, context)
    # add linkification and sanitize the HTML
    html = linkify_and_sanitize_html(html, context)
    # finally restore any escaped substrings before returning HTML
    return restore_replacements(html, replacements)
 def preprocess_markdown(markdown: str) -> str:
 def preprocess_markdown(markdown: str) -> tuple[str, dict[str, str]]:
    """Pre-process markdown before passing it to CommonMark."""
    markdown = escape_accidental_ordered_lists(markdown)
@ -165,7 +169,27 @@ def preprocess_markdown(markdown: str) -> str:
    # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
    markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")
    return markdown
    # temporary replacements to restore after markdown processing
    replacements = {}
    if r"\@" in markdown:
        # cmark rendering removes `\` before any punctuation,
        # so `\@` becomes `@` and unexpectedly linkifies during the
        # later linkify step.
        # prevent that by using a replacement during cmark rendering.
        replacements[r"\@"] = random_replacement_string(markdown)
        markdown = markdown.replace(r"\@", replacements[r"\@"])
    return (markdown, replacements)
 def random_replacement_string(markdown: str) -> str:
    """Pick a random replacement string not present in input markdown."""
    some_int = randint(1000000, 99999999999)
    some_int_str = str(some_int)
    if some_int_str in markdown:
        return random_replacement_string(markdown)
    else:
        return some_int_str
 def merge_subsequent_blockquotes(markdown: str) -> str:
@ -216,6 +240,13 @@ def postprocess_markdown_html(html: str) -> str:
    return html
 def restore_replacements(html: str, preprocessed_replacements: dict[str, str]) -> str:
    """Restore replacement strings to sanitized and linkified HTML."""
    if r"\@" in preprocessed_replacements:
        html = html.replace(preprocessed_replacements[r"\@"], "@")
    return html
 class CodeHtmlFormatter(HtmlFormatter):
    """Custom Pygments HtmlFormatter to use a <code> tag.