From 35256fb3fc022eaf1fd0fff794c97e3be14eab08 Mon Sep 17 00:00:00 2001 From: Deimos Date: Mon, 25 Nov 2019 18:12:45 -0700 Subject: [PATCH] Add SiteInfo to start generalizing site handling This moves some of the site-specific logic that was previously embedded in Topic for YouTube and Twitter links into a more general class named SiteInfo. This should be expanded more in the future, but will help for defining site names, ways of displaying their content creators, ability to define a content type on a per-site basis, and so on. --- tildes/tildes/enums.py | 3 ++ tildes/tildes/lib/site_info.py | 44 +++++++++++++++++++++++++++++ tildes/tildes/models/topic/topic.py | 28 ++++++++---------- 3 files changed, 58 insertions(+), 17 deletions(-) create mode 100644 tildes/tildes/lib/site_info.py diff --git a/tildes/tildes/enums.py b/tildes/tildes/enums.py index 2b87ced..0cd7bf5 100644 --- a/tildes/tildes/enums.py +++ b/tildes/tildes/enums.py @@ -118,6 +118,9 @@ class ContentMetadataFields(enum.Enum): def format_value(self, value: Any) -> str: """Format a value stored in this field into a string for display.""" + if self.name == "AUTHORS": + return ", ".join(value) + if self.name == "DURATION": delta = timedelta(seconds=value) diff --git a/tildes/tildes/lib/site_info.py b/tildes/tildes/lib/site_info.py new file mode 100644 index 0000000..ab3714d --- /dev/null +++ b/tildes/tildes/lib/site_info.py @@ -0,0 +1,44 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Library code related to displaying info about individual websites.""" + +from typing import List, Optional + +from tildes.enums import ContentMetadataFields, TopicContentType + + +class SiteInfo: + """Class containing various info about a particular site.""" + + def __init__( + self, + name: str, + show_author: bool = False, + content_type: Optional[TopicContentType] = None, + ) -> None: + """Initialize info for a site.""" + self.name = name + self.show_author = show_author + self.content_type = content_type + + def content_source(self, authors: Optional[List[str]] = None) -> str: + """Return a string representing the "source" of content on this site. + + If the site isn't one that needs to show its author, this is just its name. + """ + if self.show_author and authors: + authors_str = ContentMetadataFields.AUTHORS.format_value(authors) + return f"{self.name}: {authors_str}" + + return self.name + + +SITE_INFO_BY_DOMAIN = { + "twitter.com": SiteInfo( + "Twitter", show_author=True, content_type=TopicContentType.TWEET + ), + "youtube.com": SiteInfo( + "YouTube", show_author=True, content_type=TopicContentType.VIDEO + ), +} diff --git a/tildes/tildes/models/topic/topic.py b/tildes/tildes/models/topic/topic.py index 3c9aebd..3469e33 100644 --- a/tildes/tildes/models/topic/topic.py +++ b/tildes/tildes/models/topic/topic.py @@ -31,6 +31,7 @@ from tildes.lib.database import TagList from tildes.lib.datetime import utc_from_timestamp, utc_now from tildes.lib.id import id_to_id36 from tildes.lib.markdown import convert_markdown_to_safe_html +from tildes.lib.site_info import SITE_INFO_BY_DOMAIN from tildes.lib.string import convert_to_url_slug from tildes.lib.url import get_domain_from_url from tildes.metrics import incr_counter @@ -397,15 +398,13 @@ class Topic(DatabaseModel): if not self.is_link_type: raise ValueError("Non-link topics do not have a link source") - domain = self.link_domain - authors = self.get_content_metadata("authors") + # if there's no SiteInfo object for this domain, just return the domain itself + try: + site = SITE_INFO_BY_DOMAIN[self.link_domain] + except KeyError: + return self.link_domain - if domain == "twitter.com" and authors: - return f"Twitter: @{authors[0]}" - elif domain == "youtube.com" and authors: - return f"YouTube: {authors[0]}" - - return domain + return site.content_source(self.get_content_metadata("authors")) @property def is_spoiler(self) -> bool: @@ -455,15 +454,10 @@ class Topic(DatabaseModel): elif url_path.suffix.lower() in (".gif", ".jpeg", ".jpg", ".png"): return TopicContentType.IMAGE - # individual sites should be handled in a more general manner; fine for now - if self.link_domain == "youtube.com" and parsed_url.path == "/watch": - return TopicContentType.VIDEO - - try: - if self.link_domain == "twitter.com" and url_path.parts[2] == "status": - return TopicContentType.TWEET - except IndexError: - pass + # if the site has its own logic in a SiteInfo object, use that + site = SITE_INFO_BY_DOMAIN.get(self.link_domain) + if site: + return site.content_type # consider it an article if we picked up a word count of at least 200 word_count = self.get_content_metadata("word_count")