diff --git a/tildes/alembic/versions/b9d9ae4c2286_add_comment_excerpt.py b/tildes/alembic/versions/b9d9ae4c2286_add_comment_excerpt.py new file mode 100644 index 0000000..51ab171 --- /dev/null +++ b/tildes/alembic/versions/b9d9ae4c2286_add_comment_excerpt.py @@ -0,0 +1,53 @@ +"""Add comment excerpt + +Revision ID: b9d9ae4c2286 +Revises: b424479202f9 +Create Date: 2018-08-28 02:42:48.436246 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.ext.declarative import declarative_base + +from tildes.lib.string import extract_text_from_html, truncate_string + + +# revision identifiers, used by Alembic. +revision = "b9d9ae4c2286" +down_revision = "b424479202f9" +branch_labels = None +depends_on = None + + +Base = declarative_base() + + +# declare a minimal comments table here - we don't want the migration to be potentially +# impacted by future changes to the model, so we shouldn't import the real one +class Comment(Base): + __tablename__ = "comments" + + comment_id = sa.Column(sa.Integer, primary_key=True) + is_deleted = sa.Column(sa.Boolean) + rendered_html = sa.Column(sa.Text) + excerpt = sa.Column(sa.Text) + + +def upgrade(): + op.add_column( + "comments", sa.Column("excerpt", sa.Text(), server_default="", nullable=False) + ) + + # generate excerpts for all existing (non-deleted) comments + session = sa.orm.Session(bind=op.get_bind()) + comments = session.query(Comment).filter(Comment.is_deleted == False).all() + for comment in comments: + extracted_text = extract_text_from_html(comment.rendered_html) + comment.excerpt = truncate_string( + extracted_text, length=200, truncate_at_chars=" " + ) + session.commit() + + +def downgrade(): + op.drop_column("comments", "excerpt") diff --git a/tildes/consumers/topic_metadata_generator.py b/tildes/consumers/topic_metadata_generator.py index 7075f9f..fc8fa92 100644 --- a/tildes/consumers/topic_metadata_generator.py +++ b/tildes/consumers/topic_metadata_generator.py @@ -3,11 +3,10 @@ from typing import Sequence from amqpy import Message -from html5lib import HTMLParser import publicsuffix from tildes.lib.amqp import PgsqlQueueConsumer -from tildes.lib.string import simplify_string, truncate_string, word_count +from tildes.lib.string import extract_text_from_html, truncate_string, word_count from tildes.lib.url import get_domain_from_url from tildes.models.topic import Topic @@ -37,17 +36,9 @@ class TopicMetadataGenerator(PgsqlQueueConsumer): @staticmethod def _generate_text_metadata(topic: Topic) -> None: """Generate metadata for a text topic (word count and excerpt).""" - html_tree = HTMLParser().parseFragment(topic.rendered_html) + extracted_text = extract_text_from_html(topic.rendered_html) - # extract the text from all of the HTML elements - extracted_text = "".join( - [element_text for element_text in html_tree.itertext()] - ) - - # sanitize unicode, remove leading/trailing whitespace, etc. - extracted_text = simplify_string(extracted_text) - - # create a short excerpt by truncating the simplified string + # create a short excerpt by truncating the extracted string excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ") topic.content_metadata = { diff --git a/tildes/scss/modules/_comment.scss b/tildes/scss/modules/_comment.scss index ca40275..cf5d5c0 100644 --- a/tildes/scss/modules/_comment.scss +++ b/tildes/scss/modules/_comment.scss @@ -13,11 +13,6 @@ @media (min-width: $size-md) { padding: 0.2rem; } - - & > time { - margin-left: 0.4rem; - font-size: 0.6rem; - } } &:target > .comment-itself { @@ -33,12 +28,27 @@ margin-left: 0.2rem; } +.comment-posted-time { + margin-left: 0.4rem; + font-size: 0.6rem; +} + .comment-edited-time { font-size: 0.6rem; font-style: italic; margin-left: 0.4rem; } +.comment-excerpt { + display: none; + + font-style: italic; + margin-left: 0.4rem; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + .comment-nav-link { font-size: 0.6rem; margin-left: 0.4rem; @@ -116,9 +126,20 @@ border-bottom: 0; } - .comment-text, .post-buttons, .comment-replies, .comment-votes, .comment-tags { + .comment-edited-time, + .comment-nav-link, + .comment-posted-time, + .comment-replies, + .comment-text, + .comment-tags, + .comment-votes, + .post-buttons { display: none; } + + .comment-excerpt { + display: inline-block; + } } .is-comment-deleted, .is-comment-removed { diff --git a/tildes/tildes/lib/string.py b/tildes/tildes/lib/string.py index b830d03..96df9fc 100644 --- a/tildes/tildes/lib/string.py +++ b/tildes/tildes/lib/string.py @@ -5,6 +5,8 @@ from typing import Optional import unicodedata from urllib.parse import quote +from html5lib import HTMLParser + # regex for matching an entire word, handles words that include an apostrophe WORD_REGEX = re.compile(r"\w[\w'’]*") @@ -205,3 +207,14 @@ def separate_string(original: str, separator: str, segment_size: int) -> str: separated += char return separated + + +def extract_text_from_html(html: str) -> str: + """Extract plain text content from the elements inside an HTML string.""" + html_tree = HTMLParser().parseFragment(html) + + # extract the text from all of the HTML elements + extracted_text = "".join([element_text for element_text in html_tree.itertext()]) + + # sanitize unicode, remove leading/trailing whitespace, etc. + return simplify_string(extracted_text) diff --git a/tildes/tildes/models/comment/comment.py b/tildes/tildes/models/comment/comment.py index 580049d..a58f48f 100644 --- a/tildes/tildes/models/comment/comment.py +++ b/tildes/tildes/models/comment/comment.py @@ -13,6 +13,7 @@ from sqlalchemy.sql.expression import text from tildes.lib.datetime import utc_now from tildes.lib.id import id_to_id36 from tildes.lib.markdown import convert_markdown_to_safe_html +from tildes.lib.string import extract_text_from_html, truncate_string from tildes.metrics import incr_counter from tildes.models import DatabaseModel from tildes.models.topic import Topic @@ -78,6 +79,7 @@ class Comment(DatabaseModel): last_edited_time: Optional[datetime] = Column(TIMESTAMP(timezone=True)) _markdown: str = deferred(Column("markdown", Text, nullable=False)) rendered_html: str = Column(Text, nullable=False) + excerpt: str = Column(Text, nullable=False, server_default="") num_votes: int = Column(Integer, nullable=False, server_default="0", index=True) user: User = relationship("User", lazy=False, innerjoin=True) @@ -101,6 +103,11 @@ class Comment(DatabaseModel): self._markdown = new_markdown self.rendered_html = convert_markdown_to_safe_html(new_markdown) + extracted_text = extract_text_from_html(self.rendered_html) + self.excerpt = truncate_string( + extracted_text, length=200, truncate_at_chars=" " + ) + if self.created_time and utc_now() - self.created_time > EDIT_GRACE_PERIOD: self.last_edited_time = utc_now() diff --git a/tildes/tildes/templates/macros/comments.jinja2 b/tildes/tildes/templates/macros/comments.jinja2 index e25c65b..52dfe3d 100644 --- a/tildes/tildes/templates/macros/comments.jinja2 +++ b/tildes/tildes/templates/macros/comments.jinja2 @@ -46,7 +46,7 @@ {% endif %} - {{ time_ago_responsive(comment.created_time) }} + {{ time_ago_responsive(comment.created_time, class_="comment-posted-time") }} {% if comment.last_edited_time %} @@ -69,6 +69,8 @@ {% if not is_individual_comment %}data-js-comment-parent-button{% endif %} >Parent {% endif %} + +
{{ comment.excerpt }}
{% if request.has_permission('view', comment) %} diff --git a/tildes/tildes/templates/macros/datetime.jinja2 b/tildes/tildes/templates/macros/datetime.jinja2 index 7c5887f..f734fb4 100644 --- a/tildes/tildes/templates/macros/datetime.jinja2 +++ b/tildes/tildes/templates/macros/datetime.jinja2 @@ -6,9 +6,9 @@ {%- endmacro %} -{% macro time_ago_responsive(datetime) -%} +{% macro time_ago_responsive(datetime, class_=None) -%}