From ea4926b5f9d2f36f6d9e190a5d7fa6b8541399b8 Mon Sep 17 00:00:00 2001 From: Deimos Date: Mon, 27 May 2019 14:20:30 -0600 Subject: [PATCH] Create (and link) anchors for wiki pages' headers This makes it so that all wiki pages' headings will have anchors and get replaced with links to themselves. We'll probably need some styling updates now, since having them look like large links isn't great. --- tildes/tests/test_html.py | 35 +++++++++++++++++++ tildes/tildes/lib/html.py | 32 +++++++++++++++++ tildes/tildes/models/group/group_wiki_page.py | 2 ++ 3 files changed, 69 insertions(+) create mode 100644 tildes/tests/test_html.py create mode 100644 tildes/tildes/lib/html.py diff --git a/tildes/tests/test_html.py b/tildes/tests/test_html.py new file mode 100644 index 0000000..f1a5409 --- /dev/null +++ b/tildes/tests/test_html.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +from bs4 import BeautifulSoup + +from tildes.lib.html import add_anchors_to_headings +from tildes.lib.markdown import convert_markdown_to_safe_html + + +def test_add_anchor_to_headings(): + """Ensure that a basic heading ends up with the expected id.""" + markdown = "# Some heading" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + assert 'id="some_heading"' in html + + +def test_anchor_on_complex_heading(): + """Ensure that a more complex heading still gets the expected id.""" + markdown = "# This *heading* has **more formatting**" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + assert 'id="this_heading_has_more_formatting"' in html + + +def test_heading_links_to_itself(): + """Ensure that a heading ends up containing a link to itself.""" + markdown = "## Important information" + html = convert_markdown_to_safe_html(markdown) + html = add_anchors_to_headings(html) + + soup = BeautifulSoup(html, features="html5lib") + assert soup.h2.a["href"] == "#" + soup.h2["id"] diff --git a/tildes/tildes/lib/html.py b/tildes/tildes/lib/html.py new file mode 100644 index 0000000..49920c4 --- /dev/null +++ b/tildes/tildes/lib/html.py @@ -0,0 +1,32 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Functions related to HTML parsing/modification.""" + +from bs4 import BeautifulSoup + +from tildes.lib.string import convert_to_url_slug + + +def add_anchors_to_headings(html: str) -> str: + """Replace all heading elements with ones with ids that link to themselves.""" + soup = BeautifulSoup(html, features="html5lib") + + headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) + + for heading in headings: + # generate an anchor from the string contents of the heading + anchor = convert_to_url_slug("".join([string for string in heading.strings])) + + # create a link to that anchor, and put the heading's contents inside it + link = soup.new_tag("a", href=f"#{anchor}") + link.contents = heading.contents + + # put that link in a replacement same-level heading with the anchor as id + new_heading = soup.new_tag(heading.name, id=anchor) + new_heading.append(link) + + heading.replace_with(new_heading) + + # html5lib adds and tags around the fragment, strip them back out + return "".join([str(tag) for tag in soup.body.children]) diff --git a/tildes/tildes/models/group/group_wiki_page.py b/tildes/tildes/models/group/group_wiki_page.py index d369083..bcf6f5a 100644 --- a/tildes/tildes/models/group/group_wiki_page.py +++ b/tildes/tildes/models/group/group_wiki_page.py @@ -15,6 +15,7 @@ from sqlalchemy.sql.expression import text from tildes.lib.database import CIText from tildes.lib.datetime import utc_now +from tildes.lib.html import add_anchors_to_headings from tildes.lib.markdown import convert_markdown_to_safe_html from tildes.lib.string import convert_to_url_slug from tildes.models import DatabaseModel @@ -131,6 +132,7 @@ class GroupWikiPage(DatabaseModel): self.markdown = new_markdown self.rendered_html = convert_markdown_to_safe_html(new_markdown) + self.rendered_html = add_anchors_to_headings(self.rendered_html) self.last_edited_time = utc_now() repo = Repository(self.BASE_PATH)