From e3842e88c4393a2cbd3fa2524181a1b8da50ca6f Mon Sep 17 00:00:00 2001 From: Deimos Date: Fri, 10 Aug 2018 01:01:58 -0600 Subject: [PATCH] Enable cmark-gfm extensions (table, strikethrough) An example was recently added to the github cmark repo to show how to set up the extensions from Python, so this is heavily based on that code: https://github.com/github/cmark/blob/master/wrappers/wrapper_ext.py This should also fix a memory leak, since I wasn't manually freeing the returned buffer (as the library recommends that you do). --- tildes/tests/test_markdown.py | 34 ++++++++++++++++----- tildes/tildes/lib/cmark.py | 57 +++++++++++++++++++++++++++++++++++ tildes/tildes/lib/markdown.py | 40 ++++++++++++++++-------- 3 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 tildes/tildes/lib/cmark.py diff --git a/tildes/tests/test_markdown.py b/tildes/tests/test_markdown.py index 1c5dbd9..f96c9ff 100644 --- a/tildes/tests/test_markdown.py +++ b/tildes/tests/test_markdown.py @@ -28,6 +28,32 @@ def test_basic_markdown_unescaped(): assert '<' not in sanitized +def test_strikethrough(): + """Ensure strikethrough works and doesn't turn into a group link.""" + markdown = "This ~should not~ should work" + processed = convert_markdown_to_safe_html(markdown) + + assert '' in processed + assert '' in processed + assert processed.count(' str: markdown_bytes = markdown.encode('utf8') - # enables the --hardbreaks option - # (can I import this? it's defined in cmark.h as CMARK_OPT_HARDBREAKS) - cmark_options = 4 + parser = cmark_parser_new(CMARK_OPTS) + for name in CMARK_EXTENSIONS: + ext = cmark_find_syntax_extension(name) + cmark_parser_attach_syntax_extension(parser, ext) + exts = cmark_parser_get_syntax_extensions(parser) + + cmark_parser_feed(parser, markdown_bytes, len(markdown_bytes)) + doc = cmark_parser_finish(parser) + + html_bytes = cmark_render_html(doc, CMARK_OPTS, exts) - html_bytes = commonmark(markdown_bytes, len(markdown_bytes), cmark_options) + cmark_parser_free(parser) + cmark_node_free(doc) html = html_bytes.decode('utf8')