Browse Source

Hide the inner <details> text from comment excerpts but include the <summary> text.

Closes tildes-community/tildes-cf#4

See merge request tildes-community/tildes-cf!13
develop
Bauke 3 weeks ago
committed by Andrew Shu
parent
commit
0a23202938
  1. 4
      tildes/consumers/topic_metadata_generator.py
  2. 10
      tildes/tests/test_comment.py
  3. 21
      tildes/tests/test_string.py
  4. 14
      tildes/tildes/lib/string.py
  5. 4
      tildes/tildes/models/comment/comment.py

4
tildes/consumers/topic_metadata_generator.py

@ -69,7 +69,9 @@ class TopicMetadataGenerator(EventStreamConsumer):
if not topic.rendered_html:
return {}
extracted_text = extract_text_from_html(topic.rendered_html)
extracted_text = extract_text_from_html(
topic.rendered_html, exclude_details_include_summary=True
)
# create a short excerpt by truncating the extracted string
excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")

10
tildes/tests/test_comment.py

@ -154,6 +154,16 @@ def test_comment_excerpt_excludes_del(topic, session_user):
assert comment.excerpt == "I really love it."
def test_comment_excerpt_excludes_details(topic, session_user):
"""Ensure that comment excerpts don't include text from <details> elements.
But ensure that the inner <summary> text *is* included.
"""
markdown = "<details>\n<summary>Spoilers!</summary>\n\nHide me!\n</details>"
comment = Comment(topic, session_user, markdown)
assert comment.excerpt == "Spoilers!"
def test_comment_tree(db, topic, session_user):
"""Ensure that building and pruning a comment tree works."""
all_comments = []

21
tildes/tests/test_string.py

@ -7,6 +7,7 @@ from tildes.lib.string import (
truncate_string,
truncate_string_at_char,
word_count,
extract_text_from_html,
)
@ -152,3 +153,23 @@ def test_basic_camelcase_to_snakecase():
def test_camelcase_to_snakecase_with_acronym():
"""Ensure CamelCase->snake_case works as expected with an acronym."""
assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
def test_extract_text_from_html_include_details():
"""Ensure extract_text_from_html behavior includes <details> elements by default."""
html = "<details><summary>Spoilers!</summary> <p>Don't hide me!</p></details>"
assert extract_text_from_html(html) == "Spoilers! Don't hide me!"
html = "<details><p>Don't hide me!</p></details>"
assert extract_text_from_html(html) == "Don't hide me!"
def test_extract_text_from_html_exclude_details():
"""Ensure extract_text_from_html behavior excludes <details> elements when specified."""
html = "<details><summary>Spoilers!</summary> <p>Hide me!</p></details>"
text = extract_text_from_html(html, exclude_details_include_summary=True)
assert text == "Spoilers!"
html = "<details><p>Hide me!</p></details>"
text = extract_text_from_html(html, exclude_details_include_summary=True)
assert text == "Details"

14
tildes/tildes/lib/string.py

@ -226,7 +226,11 @@ def separate_string(original: str, separator: str, segment_size: int) -> str:
return separated
def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) -> str:
def extract_text_from_html(
html: str,
skip_tags: Optional[list[str]] = None,
exclude_details_include_summary: bool = False,
) -> str:
"""Extract plain text content from the elements inside an HTML string."""
def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
@ -242,6 +246,14 @@ def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) ->
if element.tag in skip_tags:
return
if element.tag == "details" and exclude_details_include_summary:
for subelement in element:
if subelement.tag == "summary":
yield from extract_text(subelement, skip_tags)
return
yield "Details"
return
if element.text:
yield element.text

4
tildes/tildes/models/comment/comment.py

@ -138,7 +138,9 @@ class Comment(DatabaseModel):
self.rendered_html = convert_markdown_to_safe_html(new_markdown)
extracted_text = extract_text_from_html(
self.rendered_html, skip_tags=["blockquote", "del"]
self.rendered_html,
skip_tags=["blockquote", "del"],
exclude_details_include_summary=True,
)
self.excerpt = truncate_string(
extracted_text, length=200, truncate_at_chars=" "

Loading…
Cancel
Save