From 328142aedf41a75263079851bdbc43792d0f8104 Mon Sep 17 00:00:00 2001
From: Deimos <deimos@tildes.net>
Date: Tue, 6 Nov 2018 16:34:21 -0700
Subject: [PATCH] Markdown processing: replace &quot; entities

cmark-gfm's behavior seems to have changed when I upgraded it, and it's
now producing &quot; entities instead of normal double-quote characters.
This was breaking syntax-highlighting, so this simply replaces all the
entities back to normal double-quotes. This is maybe a little risky to
do without doing it as part of proper HTML parsing, but I think it
should overall be pretty safe.
---
 tildes/tildes/lib/markdown.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tildes/tildes/lib/markdown.py b/tildes/tildes/lib/markdown.py
index 2c97d00..8c5a48c 100644
--- a/tildes/tildes/lib/markdown.py
+++ b/tildes/tildes/lib/markdown.py
@@ -198,6 +198,17 @@ def escape_accidental_ordered_lists(markdown: str) -> str:
 
 def postprocess_markdown_html(html: str) -> str:
     """Apply post-processing to HTML generated by markdown parser."""
+    # cmark (and cmark-gfm) replaces double-quote characters with the &quot; entity.
+    # This is almost always unnecessary, and is causing issues with some of the HTML
+    # processing, since (for example) BeautifulSoup will convert them back, which causes
+    # the string-replacement in apply_syntax_highlighting() to fail if a code block
+    # contains any double-quote characters.
+    #
+    # We'll just do a full replacement here - this has a possibility of being dangerous,
+    # but it should be extremely unlikely and the sanitization function should make sure
+    # that nothing malicious can happen regardless.
+    html = html.replace("&quot;", '"')
+
     # apply syntax highlighting to code blocks
     html = apply_syntax_highlighting(html)