Browse Source

Merge subsequent blockquotes

Markdown won't merge subsequent quoted paragraphs into a single
blockquote unless the blank line between them also has a ">" on it. Most
people don't expect this behavior when quoting a multi-paragraph
section, and end up with a bunch of separated blockquotes.

This should fix that issue by default, but still allows people to keep
their blockquotes separated by adding at least one more newline between
the two quoted paragraphs (so they have at least two blank lines), among
various other methods.
merge-requests/64/head
Deimos 6 years ago
parent
commit
52a95b118c
  1. 16
      tildes/tests/test_markdown.py
  2. 16
      tildes/tildes/lib/markdown.py

16
tildes/tests/test_markdown.py

@ -81,6 +81,22 @@ def test_accidental_ordered_list():
assert "<ol" not in html assert "<ol" not in html
def test_merged_blockquotes():
"""Ensure subsequent blockquotes are merged."""
markdown = "> Paragraph 1\n\n> Paragraph 2"
html = convert_markdown_to_safe_html(markdown)
assert html.count("<blockquote") == 1
def test_separated_blockquotes():
"""Ensure blockquotes can be separated with an additional newline."""
markdown = "> Paragraph 1\n\n\n> Paragraph 2"
html = convert_markdown_to_safe_html(markdown)
assert html.count("<blockquote") == 2
def test_existing_newline_not_doubled(): def test_existing_newline_not_doubled():
"""Ensure that the standard markdown line break doesn't result in two.""" """Ensure that the standard markdown line break doesn't result in two."""
markdown = "A deliberate line \nbreak" markdown = "A deliberate line \nbreak"

16
tildes/tildes/lib/markdown.py

@ -107,6 +107,8 @@ BAD_ORDERED_LIST_REGEX = re.compile(
STRIP_IMAGE_ELEMENTS_REGEX = re.compile(r'<img src="([^"<>]*?)" alt="([^"<>]*?)" />') STRIP_IMAGE_ELEMENTS_REGEX = re.compile(r'<img src="([^"<>]*?)" alt="([^"<>]*?)" />')
SUBSEQUENT_BLOCKQUOTES_REGEX = re.compile("^>([^\n]*?)\n\n(?=>)", flags=re.MULTILINE)
@histogram_timer("markdown_processing") @histogram_timer("markdown_processing")
def convert_markdown_to_safe_html(markdown: str) -> str: def convert_markdown_to_safe_html(markdown: str) -> str:
@ -143,12 +145,26 @@ def preprocess_markdown(markdown: str) -> str:
"""Pre-process markdown before passing it to CommonMark.""" """Pre-process markdown before passing it to CommonMark."""
markdown = escape_accidental_ordered_lists(markdown) markdown = escape_accidental_ordered_lists(markdown)
markdown = merge_subsequent_blockquotes(markdown)
# fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯") markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")
return markdown return markdown
def merge_subsequent_blockquotes(markdown: str) -> str:
"""Merge subsequent (separated) blockquotes into a single one.
By default, if someone quotes more than one paragraph without also adding the >
symbol on the blank lines between them, they will be interpreted as separate
blockquotes. This almost never seems to be intended, so this merges them. If
separate quotes are wanted, they can still achieve it by using at least two
newlines between quoted paragraphs (or various other methods).
"""
return SUBSEQUENT_BLOCKQUOTES_REGEX.sub(">\\1\n>\n", markdown)
def escape_accidental_ordered_lists(markdown: str) -> str: def escape_accidental_ordered_lists(markdown: str) -> str:
"""Escape markdown that's probably an accidental ordered list. """Escape markdown that's probably an accidental ordered list.

Loading…
Cancel
Save