Merge subsequent blockquotes

Markdown won't merge subsequent quoted paragraphs into a single blockquote unless the blank line between them also has a ">" on it. Most people don't expect this behavior when quoting a multi-paragraph section, and end up with a bunch of separated blockquotes. This should fix that issue by default, but still allows people to keep their blockquotes separated by adding at least one more newline between the two quoted paragraphs (so they have at least two blank lines), among various other methods.
7 years ago · 52a95b118c
2 changed files with 32 additions and 0 deletions
--- a/tildes/tests/test_markdown.py
+++ b/tildes/tests/test_markdown.py
@ -81,6 +81,22 @@ def test_accidental_ordered_list():
    assert "<ol" not in html


+def test_merged_blockquotes():
+    """Ensure subsequent blockquotes are merged."""
+    markdown = "> Paragraph 1\n\n> Paragraph 2"
+    html = convert_markdown_to_safe_html(markdown)
+
+    assert html.count("<blockquote") == 1
+
+
+def test_separated_blockquotes():
+    """Ensure blockquotes can be separated with an additional newline."""
+    markdown = "> Paragraph 1\n\n\n> Paragraph 2"
+    html = convert_markdown_to_safe_html(markdown)
+
+    assert html.count("<blockquote") == 2
+
+
 def test_existing_newline_not_doubled():
    """Ensure that the standard markdown line break doesn't result in two."""
    markdown = "A deliberate line  \nbreak"
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@ -107,6 +107,8 @@ BAD_ORDERED_LIST_REGEX = re.compile(

 STRIP_IMAGE_ELEMENTS_REGEX = re.compile(r'<img src="([^"<>]*?)" alt="([^"<>]*?)" />')

+SUBSEQUENT_BLOCKQUOTES_REGEX = re.compile("^>([^\n]*?)\n\n(?=>)", flags=re.MULTILINE)
+

@histogram_timer("markdown_processing")
 def convert_markdown_to_safe_html(markdown: str) -> str:
@ -143,12 +145,26 @@ def preprocess_markdown(markdown: str) -> str:
    """Pre-process markdown before passing it to CommonMark."""
    markdown = escape_accidental_ordered_lists(markdown)

+    markdown = merge_subsequent_blockquotes(markdown)
+
    # fix the "shrug" emoji ¯\_(ツ)_/¯ to prevent markdown mangling it
    markdown = markdown.replace(r"¯\_(ツ)_/¯", r"¯\\\_(ツ)\_/¯")

    return markdown


+def merge_subsequent_blockquotes(markdown: str) -> str:
+    """Merge subsequent (separated) blockquotes into a single one.
+
+    By default, if someone quotes more than one paragraph without also adding the >
+    symbol on the blank lines between them, they will be interpreted as separate
+    blockquotes. This almost never seems to be intended, so this merges them. If
+    separate quotes are wanted, they can still achieve it by using at least two
+    newlines between quoted paragraphs (or various other methods).
+    """
+    return SUBSEQUENT_BLOCKQUOTES_REGEX.sub(">\\1\n>\n", markdown)
+
+
 def escape_accidental_ordered_lists(markdown: str) -> str:
    """Escape markdown that's probably an accidental ordered list.