Browse Source

Markdown: Automatically link subreddit references

merge-requests/110/head
yabai 5 years ago
committed by Deimos
parent
commit
eb7f2a75fd
  1. 71
      tildes/tests/test_markdown.py
  2. 26
      tildes/tildes/lib/markdown.py

71
tildes/tests/test_markdown.py

@ -331,6 +331,77 @@ def test_group_ref_inside_other_tags_linkified():
assert soup.find("a", href="/~group.reference")
def test_subreddit_without_leading_forward_slash_linkified():
"""Ensure subreddit without leading forward slash is linkified."""
markdown = "Check out: r/antarctica"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert soup.find("a", href="https://www.reddit.com/r/antarctica/")
def test_subreddit_with_leading_forward_slash_linkified():
"""Ensure subreddit with leading forward slash is linkified."""
markdown = "Check out: /r/antarctica"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert soup.find("a", href="https://www.reddit.com/r/antarctica/")
def test_subreddit_linkified_without_punctuation():
"""Ensure subreddit is linkified without punctuation."""
markdown = "Check out: /r/antarctica!"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert soup.find("a", href="https://www.reddit.com/r/antarctica/")
def test_multiple_subreddits_linkify():
"""Ensure multiple subreddits linkify."""
markdown = (
"Here are a couple of my favorite subreddits:\n\n"
"* r/antarctica\n"
"* /r/emacs\n"
)
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert len(soup.find_all("a")) == 2
def test_subreddit_inside_pre_ignored():
"""Ensure a subreddit link inside a <pre> tag doesn't get linked."""
markdown = (
"```\n"
"# This is a code block\n"
"# I found this code on r/python, hopefully it works\n"
"```\n"
)
processed = convert_markdown_to_safe_html(markdown)
assert "<a" not in processed
def test_subreddit_lookalike_conjunction_not_linkified():
"""Ensure where forward slash used for conjunction, text doesn't linkify."""
markdown = "water/ocean"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert len(soup.find_all("a")) == 0
def test_subreddit_followed_by_apostrophe_not_linkified():
"""Ensure we don't linkify apostrophes after subreddit references."""
markdown = "/r/funny's moderators"
processed = convert_markdown_to_safe_html(markdown)
soup = BeautifulSoup(processed, features="html5lib")
assert soup.find("a", href="https://www.reddit.com/r/funny/")
def test_username_reference_linkified():
"""Ensure a basic username reference gets linkified."""
markdown = "Hey @SomeUser, what do you think of this?"

26
tildes/tildes/lib/markdown.py

@ -295,6 +295,10 @@ class LinkifyFilter(Filter):
# carefully later.
USERNAME_REFERENCE_REGEX = re.compile(r"(?<![\w\\])(?:/?u/|@)([\w-]+)\b")
# Regex that finds probable references to subreddits. Matches with or without the
# preceding slash (e.g. either of "r/emacs" or "/r/emacs").
SUBREDDIT_REFERENCE_REGEX = re.compile(r"(?<!\w)/?r/(\w+)\b")
def __init__(
self, source: NonRecursiveTreeWalker, skip_tags: Optional[List[str]] = None
):
@ -334,7 +338,7 @@ class LinkifyFilter(Filter):
# text token not inside a skipped tag - do the actual linkification
# replacements
# Note: doing the two replacements "iteratively" like this only works
# Note: doing the replacements "iteratively" like this only works
# because they are "disjoint" and we know they're not competing to
# replace the same text. If more replacements are added in the future
# that might conflict with each other, this will need to be reworked
@ -344,12 +348,19 @@ class LinkifyFilter(Filter):
filter_regex=self.GROUP_REFERENCE_REGEX,
linkify_function=self._tokenize_group_match,
)
replaced_tokens = self._linkify_tokens(
replaced_tokens,
filter_regex=self.USERNAME_REFERENCE_REGEX,
linkify_function=self._tokenize_username_match,
)
replaced_tokens = self._linkify_tokens(
replaced_tokens,
filter_regex=self.SUBREDDIT_REFERENCE_REGEX,
linkify_function=self._tokenize_subreddit_match,
)
# yield all the tokens returned from the replacement process (will be
# just the original token if nothing was replaced)
for new_token in replaced_tokens:
@ -463,6 +474,19 @@ class LinkifyFilter(Filter):
# the username wasn't valid, so just keep it as the original text
return [{"type": "Characters", "data": match[0]}]
@staticmethod
def _tokenize_subreddit_match(match: Match) -> List[dict]:
"""Convert a subreddit reference into HTML tokens."""
return [
{
"type": "StartTag",
"name": "a",
"data": {(None, "href"): f"https://www.reddit.com/r/{match[1]}/"},
},
{"type": "Characters", "data": match[0]},
{"type": "EndTag", "name": "a"},
]
def linkify_and_sanitize_html(
html: str, context: Optional[HTMLSanitizationContext] = None

Loading…
Cancel
Save