From af66a76054e7c157202c5d82238c2b1ef906f66b Mon Sep 17 00:00:00 2001 From: Deimos Date: Tue, 12 Nov 2019 23:16:25 -0700 Subject: [PATCH] Add better control of content metadata by type Previously, the content metadata displayed next to a topic's content type (like "Article: 1800 words") was fairly generic and could result in strange data being displayed if a scraper fetched it for an inappropriate type (for example, displaying word count for videos). This creates an enum to hold all the different content metadata fields, and moves some logic into that class to handle deciding which fields to show for different types, and the formatting logic for values. --- tildes/tildes/enums.py | 66 +++++++++++++++++++++++++++- tildes/tildes/lib/link_metadata.py | 13 ------ tildes/tildes/models/topic/topic.py | 47 +++++++------------- tildes/tildes/views/api/web/topic.py | 19 +------- 4 files changed, 83 insertions(+), 62 deletions(-) delete mode 100644 tildes/tildes/lib/link_metadata.py diff --git a/tildes/tildes/enums.py b/tildes/tildes/enums.py index af440fa..57a66ae 100644 --- a/tildes/tildes/enums.py +++ b/tildes/tildes/enums.py @@ -4,7 +4,10 @@ """Contains Enum classes.""" import enum -from typing import Optional +from datetime import timedelta +from typing import Any, List, Optional + +from tildes.lib.datetime import utc_from_timestamp class CommentNotificationType(enum.Enum): @@ -80,6 +83,67 @@ class CommentLabelOption(enum.Enum): return None +class ContentMetadataFields(enum.Enum): + """Enum for the fields of content metadata stored and used (for topics).""" + + AUTHORS = enum.auto() + DESCRIPTION = enum.auto() + DOMAIN = enum.auto() + DURATION = enum.auto() + EXCERPT = enum.auto() + PUBLISHED = enum.auto() + TITLE = enum.auto() + WORD_COUNT = enum.auto() + + @property + def key(self) -> str: + """Return the key to store this field under.""" + return self.name.lower() + + @classmethod + def detail_fields_for_content_type( + cls, content_type: "TopicContentType", + ) -> List["ContentMetadataFields"]: + """Return a list of fields to display for detail about a particular type.""" + if content_type is TopicContentType.ARTICLE: + return [cls.WORD_COUNT, cls.PUBLISHED] + + if content_type is TopicContentType.TEXT: + return [cls.WORD_COUNT] + + if content_type is TopicContentType.VIDEO: + return [cls.DURATION, cls.PUBLISHED] + + return [] + + def format_value(self, value: Any) -> str: + """Format a value stored in this field into a string for display.""" + if self.name == "WORD_COUNT": + if value == 1: + return "1 word" + + return f"{value} words" + + if self.name == "DURATION": + delta = timedelta(seconds=value) + + # When converted to str, timedelta always includes hours and minutes, + # so we want to strip off all the excess zeros and/or colons. However, + # if it's less than a minute we'll need to add one back. + duration_str = str(delta).lstrip("0:") + if value < 60: + duration_str = f"0:{duration_str}" + + return duration_str + + if self.name == "PUBLISHED": + published = utc_from_timestamp(value) + date_str = published.strftime("%b %-d %Y") + return f"published {date_str}" + + return str(value) + + class FinancialEntryType(enum.Enum): """Enum for entry types in the Financials table.""" diff --git a/tildes/tildes/lib/link_metadata.py b/tildes/tildes/lib/link_metadata.py deleted file mode 100644 index ce6d0c5..0000000 --- a/tildes/tildes/lib/link_metadata.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 Tildes contributors -# SPDX-License-Identifier: AGPL-3.0-or-later - -"""Constants/classes/functions related to metadata generated from links.""" - -METADATA_KEYS = [ - "authors", - "description", - "duration", - "published", - "title", - "word_count", -] diff --git a/tildes/tildes/models/topic/topic.py b/tildes/tildes/models/topic/topic.py index ca9acc6..e3feb3a 100644 --- a/tildes/tildes/models/topic/topic.py +++ b/tildes/tildes/models/topic/topic.py @@ -26,7 +26,7 @@ from sqlalchemy.orm import deferred, relationship from sqlalchemy.sql.expression import text from titlecase import titlecase -from tildes.enums import TopicContentType, TopicType +from tildes.enums import ContentMetadataFields, TopicContentType, TopicType from tildes.lib.database import TagList from tildes.lib.datetime import utc_from_timestamp, utc_now from tildes.lib.id import id_to_id36 @@ -461,38 +461,25 @@ class Topic(DatabaseModel): @property def content_metadata_for_display(self) -> str: """Return a string of the content's metadata, suitable for display.""" + if not self.content_type: + return "" + metadata_strings = [] - # display word count (if we have it) with either type of topic - word_count = self.get_content_metadata("word_count") - if word_count is not None: - if word_count == 1: - metadata_strings.append("1 word") - else: - metadata_strings.append(f"{word_count} words") + fields = ContentMetadataFields.detail_fields_for_content_type(self.content_type) - if self.is_link_type: - # display the duration if we have it - duration = self.get_content_metadata("duration") - if duration: - duration_delta = timedelta(seconds=duration) - - # When converted to str, timedelta always includes hours and minutes, - # so we want to strip off all the excess zeros and/or colons. However, - # if it's less than a minute we'll need to add one back. - duration_str = str(duration_delta).lstrip("0:") - if duration < 60: - duration_str = f"0:{duration_str}" - - metadata_strings.append(duration_str) - - # display the published date if it's more than 3 days before the topic - published_timestamp = self.get_content_metadata("published") - if published_timestamp: - published = utc_from_timestamp(published_timestamp) - if self.created_time - published > timedelta(days=3): - date_str = published.strftime("%b %-d %Y") - metadata_strings.append(f"published {date_str}") + for field in fields: + value = self.get_content_metadata(field.key) + if not value: + continue + + # only show published date if it's more than 3 days before the topic + if field is ContentMetadataFields.PUBLISHED: + published = utc_from_timestamp(value) + if self.created_time - published < timedelta(days=3): + continue + + metadata_strings.append(field.format_value(value)) return ", ".join(metadata_strings) diff --git a/tildes/tildes/views/api/web/topic.py b/tildes/tildes/views/api/web/topic.py index 6708ea6..bdbcdc7 100644 --- a/tildes/tildes/views/api/web/topic.py +++ b/tildes/tildes/views/api/web/topic.py @@ -8,13 +8,10 @@ from marshmallow.fields import String from pyramid.httpexceptions import HTTPNotFound from pyramid.request import Request from pyramid.response import Response -from sqlalchemy import cast, Text -from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.exc import IntegrityError from webargs.pyramidparser import use_kwargs from tildes.enums import LogEventType -from tildes.lib.link_metadata import METADATA_KEYS from tildes.models.group import Group from tildes.models.log import LogTopic from tildes.models.topic import Topic, TopicBookmark, TopicVote @@ -393,22 +390,8 @@ def patch_topic_link(request: Request, link: str) -> dict: ) ) - # Wipe any old metadata from scrapers so we don't leave behind remnants - # (this probably really shouldn't be done here, but it's fine for now) - ( - request.query(Topic) - .filter(Topic.topic_id == topic.topic_id) - .update( - { - "content_metadata": Topic.content_metadata.op("-")( # type: ignore - cast(METADATA_KEYS, ARRAY(Text)) - ) - }, - synchronize_session=False, - ) - ) - topic.link = link + topic.content_metadata = None return Response(f'{topic.link}')