Add SiteInfo to start generalizing site handling

This moves some of the site-specific logic that was previously embedded in Topic for YouTube and Twitter links into a more general class named SiteInfo. This should be expanded more in the future, but will help for defining site names, ways of displaying their content creators, ability to define a content type on a per-site basis, and so on.
6 years ago · 35256fb3fc
3 changed files with 58 additions and 17 deletions
--- a/tildes/tildes/enums.py
+++ b/tildes/tildes/enums.py
@ -118,6 +118,9 @@ class ContentMetadataFields(enum.Enum):

    def format_value(self, value: Any) -> str:
        """Format a value stored in this field into a string for display."""
+        if self.name == "AUTHORS":
+            return ", ".join(value)
+
        if self.name == "DURATION":
            delta = timedelta(seconds=value)

--- a/tildes/tildes/lib/site_info.py
+++ b/tildes/tildes/lib/site_info.py
@ -0,0 +1,44 @@
+# Copyright (c) 2019 Tildes contributors <code@tildes.net>
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Library code related to displaying info about individual websites."""
+
+from typing import List, Optional
+
+from tildes.enums import ContentMetadataFields, TopicContentType
+
+
+class SiteInfo:
+    """Class containing various info about a particular site."""
+
+    def __init__(
+        self,
+        name: str,
+        show_author: bool = False,
+        content_type: Optional[TopicContentType] = None,
+    ) -> None:
+        """Initialize info for a site."""
+        self.name = name
+        self.show_author = show_author
+        self.content_type = content_type
+
+    def content_source(self, authors: Optional[List[str]] = None) -> str:
+        """Return a string representing the "source" of content on this site.
+
+        If the site isn't one that needs to show its author, this is just its name.
+        """
+        if self.show_author and authors:
+            authors_str = ContentMetadataFields.AUTHORS.format_value(authors)
+            return f"{self.name}: {authors_str}"
+
+        return self.name
+
+
+SITE_INFO_BY_DOMAIN = {
+    "twitter.com": SiteInfo(
+        "Twitter", show_author=True, content_type=TopicContentType.TWEET
+    ),
+    "youtube.com": SiteInfo(
+        "YouTube", show_author=True, content_type=TopicContentType.VIDEO
+    ),
+}
--- a/tildes/tildes/models/topic/topic.py
+++ b/tildes/tildes/models/topic/topic.py
@ -31,6 +31,7 @@ from tildes.lib.database import TagList
 from tildes.lib.datetime import utc_from_timestamp, utc_now
 from tildes.lib.id import id_to_id36
 from tildes.lib.markdown import convert_markdown_to_safe_html
+from tildes.lib.site_info import SITE_INFO_BY_DOMAIN
 from tildes.lib.string import convert_to_url_slug
 from tildes.lib.url import get_domain_from_url
 from tildes.metrics import incr_counter
@ -397,15 +398,13 @@ class Topic(DatabaseModel):
        if not self.is_link_type:
            raise ValueError("Non-link topics do not have a link source")

-        domain = self.link_domain
-        authors = self.get_content_metadata("authors")
+        # if there's no SiteInfo object for this domain, just return the domain itself
+        try:
+            site = SITE_INFO_BY_DOMAIN[self.link_domain]
+        except KeyError:
+            return self.link_domain

-        if domain == "twitter.com" and authors:
-            return f"Twitter: @{authors[0]}"
-        elif domain == "youtube.com" and authors:
-            return f"YouTube: {authors[0]}"
-
-        return domain
+        return site.content_source(self.get_content_metadata("authors"))

    @property
    def is_spoiler(self) -> bool:
@ -455,15 +454,10 @@ class Topic(DatabaseModel):
            elif url_path.suffix.lower() in (".gif", ".jpeg", ".jpg", ".png"):
                return TopicContentType.IMAGE

-            # individual sites should be handled in a more general manner; fine for now
-            if self.link_domain == "youtube.com" and parsed_url.path == "/watch":
-                return TopicContentType.VIDEO
-
-            try:
-                if self.link_domain == "twitter.com" and url_path.parts[2] == "status":
-                    return TopicContentType.TWEET
-            except IndexError:
-                pass
+            # if the site has its own logic in a SiteInfo object, use that
+            site = SITE_INFO_BY_DOMAIN.get(self.link_domain)
+            if site:
+                return site.content_type

            # consider it an article if we picked up a word count of at least 200
            word_count = self.get_content_metadata("word_count")