From 328142aedf41a75263079851bdbc43792d0f8104 Mon Sep 17 00:00:00 2001 From: Deimos Date: Tue, 6 Nov 2018 16:34:21 -0700 Subject: [PATCH] Markdown processing: replace " entities cmark-gfm's behavior seems to have changed when I upgraded it, and it's now producing " entities instead of normal double-quote characters. This was breaking syntax-highlighting, so this simply replaces all the entities back to normal double-quotes. This is maybe a little risky to do without doing it as part of proper HTML parsing, but I think it should overall be pretty safe. --- tildes/tildes/lib/markdown.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tildes/tildes/lib/markdown.py b/tildes/tildes/lib/markdown.py index 2c97d00..8c5a48c 100644 --- a/tildes/tildes/lib/markdown.py +++ b/tildes/tildes/lib/markdown.py @@ -198,6 +198,17 @@ def escape_accidental_ordered_lists(markdown: str) -> str: def postprocess_markdown_html(html: str) -> str: """Apply post-processing to HTML generated by markdown parser.""" + # cmark (and cmark-gfm) replaces double-quote characters with the " entity. + # This is almost always unnecessary, and is causing issues with some of the HTML + # processing, since (for example) BeautifulSoup will convert them back, which causes + # the string-replacement in apply_syntax_highlighting() to fail if a code block + # contains any double-quote characters. + # + # We'll just do a full replacement here - this has a possibility of being dangerous, + # but it should be extremely unlikely and the sanitization function should make sure + # that nothing malicious can happen regardless. + html = html.replace(""", '"') + # apply syntax highlighting to code blocks html = apply_syntax_highlighting(html)