mirror of https://gitlab.com/tildes/tildes.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
383 lines
12 KiB
383 lines
12 KiB
# Copyright (c) 2018 Tildes contributors <code@tildes.net>
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from tildes.lib.markdown import convert_markdown_to_safe_html
|
|
|
|
|
|
def test_script_tag_escaped():
|
|
"""Ensure that a <script> tag can't get through."""
|
|
markdown = "<script>alert()</script>"
|
|
sanitized = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<script>" not in sanitized
|
|
|
|
|
|
def test_basic_markdown_unescaped():
|
|
"""Test that some common markdown comes through without escaping."""
|
|
markdown = (
|
|
"# Here's a header.\n\n"
|
|
"This chunk of text has **some bold** and *some italics* in it.\n\n"
|
|
"A separator will be below this paragraph.\n\n"
|
|
"---\n\n"
|
|
"* An unordered list item\n"
|
|
"* Another list item\n\n"
|
|
"> This should be a quote.\n\n"
|
|
" And a code block\n\n"
|
|
"Also some `inline code` and [a link](http://example.com).\n\n"
|
|
"And a manual break \nbetween lines.\n\n"
|
|
)
|
|
sanitized = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<" not in sanitized
|
|
|
|
|
|
def test_strikethrough():
|
|
"""Ensure strikethrough works and doesn't turn into a group link."""
|
|
markdown = "This ~should not~ should work"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<del>" in processed
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_table():
|
|
"""Ensure table markdown works."""
|
|
markdown = (
|
|
"|Header 1|Header 2|Header 3|\n"
|
|
"|--------|-------:|:------:|\n"
|
|
"|1 - 1 |1 - 2 |1 - 3 |\n"
|
|
"|2 - 1|2 - 2|2 - 3|\n"
|
|
)
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<table>" in processed
|
|
assert processed.count("<tr") == 3
|
|
assert processed.count("<td") == 6
|
|
assert 'align="right"' in processed
|
|
assert 'align="center"' in processed
|
|
|
|
|
|
def test_deliberate_ordered_list():
|
|
"""Ensure a "deliberate" ordered list works."""
|
|
markdown = (
|
|
"My first line of text.\n\n"
|
|
"1. I want\n"
|
|
"2. An ordered\n"
|
|
"3. List here\n\n"
|
|
"A final line."
|
|
)
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<ol>" in html
|
|
|
|
|
|
def test_accidental_ordered_list():
|
|
"""Ensure a common "accidental" ordered list gets escaped."""
|
|
markdown = "1975. It was a long time ago."
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<ol" not in html
|
|
|
|
|
|
def test_merged_blockquotes():
|
|
"""Ensure subsequent blockquotes are merged."""
|
|
markdown = "> Paragraph 1\n\n> Paragraph 2"
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert html.count("<blockquote") == 1
|
|
|
|
|
|
def test_separated_blockquotes():
|
|
"""Ensure blockquotes can be separated with an additional newline."""
|
|
markdown = "> Paragraph 1\n\n\n> Paragraph 2"
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert html.count("<blockquote") == 2
|
|
|
|
|
|
def test_existing_newline_not_doubled():
|
|
"""Ensure that the standard markdown line break doesn't result in two."""
|
|
markdown = "A deliberate line \nbreak"
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert html.count("<br") == 1
|
|
|
|
|
|
def test_newline_creates_br():
|
|
"""Ensure that a simple newline inside a paragraph creates a br tag."""
|
|
markdown = "This wouldn't\nnormally work"
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<br>" in html
|
|
|
|
|
|
def test_multiple_newlines():
|
|
"""Ensure markdown with multiple newlines has expected result."""
|
|
lines = ["One.", "Two.", "Three.", "Four.", "Five."]
|
|
markdown = "\n".join(lines)
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert html.count("<br") == len(lines) - 1
|
|
|
|
assert all(line in html for line in lines)
|
|
|
|
|
|
def test_newline_in_code_block():
|
|
"""Ensure newlines in code blocks don't add a <br>."""
|
|
markdown = "```\ndef testing_for_newlines():\n pass\n```\n"
|
|
html = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<br" not in html
|
|
|
|
|
|
def test_http_link_linkified():
|
|
"""Ensure that writing an http url results in a link."""
|
|
markdown = "I like http://example.com as an example."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert '<a href="http://example.com">' in processed
|
|
|
|
|
|
def test_https_link_linkified():
|
|
"""Ensure that writing an https url results in a link."""
|
|
markdown = "Also, https://example.com should work."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert '<a href="https://example.com">' in processed
|
|
|
|
|
|
def test_link_with_path_linkified():
|
|
"""Ensure a link with a path results in a link."""
|
|
markdown = "So http://example.com/a/b_c_d/e too?"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert '<a href="http://example.com/a/b_c_d/e">' in processed
|
|
|
|
|
|
def test_link_with_query_string_linkified():
|
|
"""Ensure a link with a query string results in a link."""
|
|
markdown = "Also http://example.com?something=true&test=yes works?"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert '<a href="http://example.com?something=true&test=yes">' in processed
|
|
|
|
|
|
def test_other_protocol_urls_not_linkified():
|
|
"""Ensure some other protocols don't linkify (not comprehensive)."""
|
|
protocols = ("data", "ftp", "irc", "mailto", "news", "ssh", "xmpp")
|
|
|
|
for protocol in protocols:
|
|
markdown = f"Testing {protocol}://example.com for linking"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "href" not in processed
|
|
|
|
|
|
def test_html_attr_whitelist_violation():
|
|
"""Ensure non-whitelisted HTML attributes are removed."""
|
|
markdown = (
|
|
'<a href="example.com" title="example" target="_blank" '
|
|
'referrerpolicy="unsafe-url">test link</a>'
|
|
)
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert processed == '<p><a href="example.com" title="example">test link</a></p>\n'
|
|
|
|
|
|
def test_html_lookalike_not_closed():
|
|
"""Ensure text that looks like an HTML tag isn't "fixed" by adding a closing tag."""
|
|
markdown = "I can't believe it's not <blank>!"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<blank>" in processed
|
|
assert "</blank>" not in processed
|
|
|
|
|
|
def test_html_lookalike_closing_not_removed():
|
|
"""Ensure text that looks like an HTML close tag isn't removed without an opener."""
|
|
markdown = "Well, that's just great.</sarcasm>"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "</sarcasm>" in processed
|
|
|
|
|
|
def test_a_href_protocol_violation():
|
|
"""Ensure link to other protocols removes the link (not comprehensive)."""
|
|
protocols = ("data", "ftp", "irc", "mailto", "news", "ssh", "xmpp")
|
|
|
|
for protocol in protocols:
|
|
markdown = f"Testing [a link]({protocol}://example.com) for linking"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "href" not in processed
|
|
|
|
|
|
def test_group_reference_linkified():
|
|
"""Ensure a simple group reference gets linkified."""
|
|
markdown = "Yeah, I saw that in ~books.fantasy yesterday."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/~books.fantasy")
|
|
|
|
|
|
def test_multiple_group_references_linkified():
|
|
"""Ensure multiple group references are all linkified."""
|
|
markdown = (
|
|
"I like to keep an eye on:\n\n"
|
|
"* ~music.metal\n"
|
|
"* ~music.metal.progressive\n"
|
|
"* ~music.post_rock\n"
|
|
)
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert len(soup.find_all("a")) == 3
|
|
|
|
|
|
def test_invalid_group_reference_not_linkified():
|
|
"""Ensure an invalid group reference doesn't linkify."""
|
|
markdown = (
|
|
"You can't name a group ~games.pokémon.\n"
|
|
"You also can't have a name like ~_underscores."
|
|
)
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_approximately_tilde_not_linkified():
|
|
"""Ensure a tilde in front of a number doesn't linkify."""
|
|
markdown = "Mix in ~2 cups of flour and ~1.5 tbsp of sugar with ~3kg of meat."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_uppercase_group_ref_links_correctly():
|
|
"""Ensure using uppercase in a group ref works but links correctly."""
|
|
markdown = "That was in ~Music.Metal.Progressive"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/~music.metal.progressive")
|
|
|
|
|
|
def test_existing_link_group_ref_not_replaced():
|
|
"""Ensure a group ref with an existing link doesn't get overwritten."""
|
|
markdown = "Doesn't go [~where.you.expect](http://example.com)"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert '<a href="http://example.com"' in processed
|
|
assert 'href="/~where.you.expect"' not in processed
|
|
|
|
|
|
def test_group_ref_inside_link_not_replaced():
|
|
"""Ensure a group ref inside a longer link doesn't get re-linked."""
|
|
markdown = "Found [this band from a ~music.punk post](http://whitelung.ca)"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert processed.count("<a") == 1
|
|
assert 'href="/~music.punk"' not in processed
|
|
|
|
|
|
def test_group_ref_inside_pre_ignored():
|
|
"""Ensure a group ref inside a <pre> tag doesn't get linked."""
|
|
markdown = (
|
|
"```\n"
|
|
"# This is a code block\n"
|
|
"# I found this code on ~comp.lang.python\n"
|
|
"```\n"
|
|
)
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_group_ref_inside_other_tags_linkified():
|
|
"""Ensure a group ref inside non-ignored tags gets linked."""
|
|
markdown = "> Here is **a ~group.reference inside** other stuff"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/~group.reference")
|
|
|
|
|
|
def test_username_reference_linkified():
|
|
"""Ensure a basic username reference gets linkified."""
|
|
markdown = "Hey @SomeUser, what do you think of this?"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/user/SomeUser")
|
|
|
|
|
|
def test_u_style_username_ref_linked():
|
|
"""Ensure a /u/username reference gets linkified."""
|
|
markdown = "Hey /u/SomeUser, what do you think of this?"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/user/SomeUser")
|
|
|
|
|
|
def test_u_alt_style_username_ref_linked():
|
|
"""Ensure a u/username reference gets linkified."""
|
|
markdown = "Hey u/SomeUser, what do you think of this?"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/user/SomeUser")
|
|
|
|
|
|
def test_accidental_u_alt_style_not_linked():
|
|
"""Ensure an "accidental" u/ usage won't get linked."""
|
|
markdown = "I think those are caribou/reindeer."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_username_and_group_refs_linked():
|
|
"""Ensure username and group references together get linkified."""
|
|
markdown = "@SomeUser makes the best posts in ~some.group for sure"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
soup = BeautifulSoup(processed, features="html5lib")
|
|
assert soup.find("a", href="/user/SomeUser")
|
|
assert soup.find("a", href="/~some.group")
|
|
|
|
|
|
def test_invalid_username_not_linkified():
|
|
"""Ensure an invalid username doesn't get linkified."""
|
|
markdown = "You can't register a username like @_underscores_"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_username_ref_inside_pre_ignored():
|
|
"""Ensure a username ref inside a <pre> tag doesn't get linked."""
|
|
markdown = "```\n# Code blatantly stolen from @HelpfulGuy on StackOverflow\n```\n"
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_group_ref_inside_code_ignored():
|
|
"""Ensure a group reference inside a <code> tag doesn't get linked."""
|
|
markdown = "Strikethrough works like: `this ~should not~ work`."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "<a" not in processed
|
|
|
|
|
|
def test_image_syntax_ignored():
|
|
"""Ensure inline image syntax is treated as a link."""
|
|
markdown = "An exclamation mark preceding a ![link](url)."
|
|
processed = convert_markdown_to_safe_html(markdown)
|
|
|
|
assert "!<a" in processed
|
|
assert "img" not in processed
|