Browse Source

Enable cmark-gfm extensions (table, strikethrough)

An example was recently added to the github cmark repo to show how to
set up the extensions from Python, so this is heavily based on that
code:
https://github.com/github/cmark/blob/master/wrappers/wrapper_ext.py

This should also fix a memory leak, since I wasn't manually freeing the
returned buffer (as the library recommends that you do).
merge-requests/25/head
Deimos 6 years ago
parent
commit
e3842e88c4
  1. 34
      tildes/tests/test_markdown.py
  2. 57
      tildes/tildes/lib/cmark.py
  3. 40
      tildes/tildes/lib/markdown.py

34
tildes/tests/test_markdown.py

@ -28,6 +28,32 @@ def test_basic_markdown_unescaped():
assert '<' not in sanitized
def test_strikethrough():
"""Ensure strikethrough works and doesn't turn into a group link."""
markdown = "This ~should not~ should work"
processed = convert_markdown_to_safe_html(markdown)
assert '<del>' in processed
assert '<a' not in processed
def test_table():
"""Ensure table markdown works."""
markdown = (
'|Header 1|Header 2|Header 3|\n'
'|--------|-------:|:------:|\n'
'|1 - 1 |1 - 2 |1 - 3 |\n'
'|2 - 1|2 - 2|2 - 3|\n'
)
processed = convert_markdown_to_safe_html(markdown)
assert '<table>' in processed
assert processed.count('<tr') == 3
assert processed.count('<td') == 6
assert 'align="right"' in processed
assert 'align="center"' in processed
def test_deliberate_ordered_list():
"""Ensure a "deliberate" ordered list works."""
markdown = (
@ -215,14 +241,6 @@ def test_approximately_tilde_not_linkified():
assert '<a' not in processed
def test_strikethrough_attempt_not_linkified():
"""Ensure someone trying to do strikethrough doesn't get a link."""
markdown = "This ~should~ shouldn't work"
processed = convert_markdown_to_safe_html(markdown)
assert '<a' not in processed
def test_uppercase_group_ref_links_correctly():
"""Ensure using uppercase in a group ref works but links correctly."""
markdown = 'That was in ~Music.Metal.Progressive'

57
tildes/tildes/lib/cmark.py

@ -0,0 +1,57 @@
"""Set up the shared libcmark-gfm library and extensions."""
# pylint: disable=invalid-name
from ctypes import CDLL, c_char_p, c_int, c_size_t, c_void_p
CMARK_DLL = CDLL('/usr/local/lib/libcmark-gfm.so')
CMARK_EXT_DLL = CDLL('/usr/local/lib/libcmark-gfmextensions.so')
# enables the --hardbreaks option for cmark
# (can I import this? it's defined in cmark.h as CMARK_OPT_HARDBREAKS)
CMARK_OPTS = 4
CMARK_EXTENSIONS = (b'strikethrough', b'table')
cmark_parser_new = CMARK_DLL.cmark_parser_new
cmark_parser_new.restype = c_void_p
cmark_parser_new.argtypes = (c_int,)
cmark_parser_feed = CMARK_DLL.cmark_parser_feed
cmark_parser_feed.restype = None
cmark_parser_feed.argtypes = (c_void_p, c_char_p, c_size_t)
cmark_parser_finish = CMARK_DLL.cmark_parser_finish
cmark_parser_finish.restype = c_void_p
cmark_parser_finish.argtypes = (c_void_p,)
cmark_parser_attach_syntax_extension = (
CMARK_DLL.cmark_parser_attach_syntax_extension)
cmark_parser_attach_syntax_extension.restype = c_int
cmark_parser_attach_syntax_extension.argtypes = (c_void_p, c_void_p)
cmark_parser_get_syntax_extensions = (
CMARK_DLL.cmark_parser_get_syntax_extensions)
cmark_parser_get_syntax_extensions.restype = c_void_p
cmark_parser_get_syntax_extensions.argtypes = (c_void_p,)
cmark_parser_free = CMARK_DLL.cmark_parser_free
cmark_parser_free.restype = None
cmark_parser_free.argtypes = (c_void_p,)
cmark_node_free = CMARK_DLL.cmark_node_free
cmark_node_free.restype = None
cmark_node_free.argtypes = (c_void_p,)
cmark_find_syntax_extension = CMARK_DLL.cmark_find_syntax_extension
cmark_find_syntax_extension.restype = c_void_p
cmark_find_syntax_extension.argtypes = (c_char_p,)
cmark_render_html = CMARK_DLL.cmark_render_html
cmark_render_html.restype = c_char_p
cmark_render_html.argtypes = (c_void_p, c_int, c_void_p)
register = CMARK_EXT_DLL.core_extensions_ensure_registered
register.restype = None
register.argtypes = ()
register()

40
tildes/tildes/lib/markdown.py

@ -1,6 +1,5 @@
"""Functions/constants related to markdown handling."""
from ctypes import CDLL, c_char_p, c_long
import re
from typing import (
Callable,
@ -25,13 +24,19 @@ from html5lib.treewalkers.base import NonRecursiveTreeWalker
from tildes.metrics import histogram_timer
from tildes.schemas.group import is_valid_group_path
from tildes.schemas.user import is_valid_username
# set up the commonmark function to call into libcmark-gfm
CMARK_DLL = CDLL('/usr/local/lib/libcmark-gfm.so')
commonmark = CMARK_DLL.cmark_markdown_to_html # pylint: disable=invalid-name
commonmark.restype = c_char_p
commonmark.argtypes = [c_char_p, c_long, c_long]
from .cmark import (
CMARK_EXTENSIONS,
CMARK_OPTS,
cmark_find_syntax_extension,
cmark_node_free,
cmark_parser_attach_syntax_extension,
cmark_parser_feed,
cmark_parser_finish,
cmark_parser_free,
cmark_parser_get_syntax_extensions,
cmark_parser_new,
cmark_render_html,
)
HTML_TAG_WHITELIST = (
@ -62,12 +67,15 @@ HTML_TAG_WHITELIST = (
'tbody',
'td',
'th',
'thead',
'tr',
'ul',
)
HTML_ATTRIBUTE_WHITELIST = {
'a': ['href', 'title'],
'ol': ['start'],
'td': ['align'],
'th': ['align'],
}
PROTOCOL_WHITELIST = ('http', 'https')
@ -112,11 +120,19 @@ def convert_markdown_to_safe_html(markdown: str) -> str:
markdown_bytes = markdown.encode('utf8')
# enables the --hardbreaks option
# (can I import this? it's defined in cmark.h as CMARK_OPT_HARDBREAKS)
cmark_options = 4
parser = cmark_parser_new(CMARK_OPTS)
for name in CMARK_EXTENSIONS:
ext = cmark_find_syntax_extension(name)
cmark_parser_attach_syntax_extension(parser, ext)
exts = cmark_parser_get_syntax_extensions(parser)
cmark_parser_feed(parser, markdown_bytes, len(markdown_bytes))
doc = cmark_parser_finish(parser)
html_bytes = cmark_render_html(doc, CMARK_OPTS, exts)
html_bytes = commonmark(markdown_bytes, len(markdown_bytes), cmark_options)
cmark_parser_free(parser)
cmark_node_free(doc)
html = html_bytes.decode('utf8')

Loading…
Cancel
Save