diff --git a/tildes/tests/test_html.py b/tildes/tests/test_html.py new file mode 100644 index 0000000..f1a5409 --- /dev/null +++ b/tildes/tests/test_html.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +from bs4 import BeautifulSoup + +from tildes.lib.html import add_anchors_to_headings +from tildes.lib.markdown import convert_markdown_to_safe_html + + +def test_add_anchor_to_headings(): + """Ensure that a basic heading ends up with the expected id.""" + markdown = "# Some heading" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + assert 'id="some_heading"' in html + + +def test_anchor_on_complex_heading(): + """Ensure that a more complex heading still gets the expected id.""" + markdown = "# This *heading* has **more formatting**" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + assert 'id="this_heading_has_more_formatting"' in html + + +def test_heading_links_to_itself(): + """Ensure that a heading ends up containing a link to itself.""" + markdown = "## Important information" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + soup = BeautifulSoup(html, features="html5lib") + assert soup.h2.a["href"] == "#" + soup.h2["id"] diff --git a/tildes/tildes/lib/html.py b/tildes/tildes/lib/html.py new file mode 100644 index 0000000..49920c4 --- /dev/null +++ b/tildes/tildes/lib/html.py @@ -0,0 +1,32 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Functions related to HTML parsing/modification.""" + +from bs4 import BeautifulSoup + +from tildes.lib.string import convert_to_url_slug + + +def add_anchors_to_headings(html: str) -> str: + """Replace all heading elements with ones with ids that link to themselves.""" + soup = BeautifulSoup(html, features="html5lib") + + headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) + + for heading in headings: + # generate an anchor from the string contents of the heading + anchor = convert_to_url_slug("".join([string for string in heading.strings])) + + # create a link to that anchor, and put the heading's contents inside it + link = soup.new_tag("a", href=f"#{anchor}") + link.contents = heading.contents + + # put that link in a replacement same-level heading with the anchor as id + new_heading = soup.new_tag(heading.name, id=anchor) + new_heading.append(link) + + heading.replace_with(new_heading) + + # html5lib adds and tags around the fragment, strip them back out + return "".join([str(tag) for tag in soup.body.children]) diff --git a/tildes/tildes/models/group/group_wiki_page.py b/tildes/tildes/models/group/group_wiki_page.py index d369083..bcf6f5a 100644 --- a/tildes/tildes/models/group/group_wiki_page.py +++ b/tildes/tildes/models/group/group_wiki_page.py @@ -15,6 +15,7 @@ from sqlalchemy.sql.expression import text from tildes.lib.database import CIText from tildes.lib.datetime import utc_now +from tildes.lib.html import add_anchors_to_headings from tildes.lib.markdown import convert_markdown_to_safe_html from tildes.lib.string import convert_to_url_slug from tildes.models import DatabaseModel @@ -131,6 +132,7 @@ class GroupWikiPage(DatabaseModel): self.markdown = new_markdown self.rendered_html = convert_markdown_to_safe_html(new_markdown) + self.rendered_html = add_anchors_to_headings(self.rendered_html) self.last_edited_time = utc_now() repo = Repository(self.BASE_PATH)