From 35256fb3fc022eaf1fd0fff794c97e3be14eab08 Mon Sep 17 00:00:00 2001
From: Deimos <deimos@tildes.net>
Date: Mon, 25 Nov 2019 18:12:45 -0700
Subject: [PATCH] Add SiteInfo to start generalizing site handling

This moves some of the site-specific logic that was previously embedded
in Topic for YouTube and Twitter links into a more general class named
SiteInfo. This should be expanded more in the future, but will help for
defining site names, ways of displaying their content creators, ability
to define a content type on a per-site basis, and so on.
---
 tildes/tildes/enums.py              |  3 ++
 tildes/tildes/lib/site_info.py      | 44 +++++++++++++++++++++++++++++
 tildes/tildes/models/topic/topic.py | 28 ++++++++----------
 3 files changed, 58 insertions(+), 17 deletions(-)
 create mode 100644 tildes/tildes/lib/site_info.py

diff --git a/tildes/tildes/enums.py b/tildes/tildes/enums.py
index 2b87ced..0cd7bf5 100644
--- a/tildes/tildes/enums.py
+++ b/tildes/tildes/enums.py
@@ -118,6 +118,9 @@ class ContentMetadataFields(enum.Enum):
 
     def format_value(self, value: Any) -> str:
         """Format a value stored in this field into a string for display."""
+        if self.name == "AUTHORS":
+            return ", ".join(value)
+
         if self.name == "DURATION":
             delta = timedelta(seconds=value)
 
diff --git a/tildes/tildes/lib/site_info.py b/tildes/tildes/lib/site_info.py
new file mode 100644
index 0000000..ab3714d
--- /dev/null
+++ b/tildes/tildes/lib/site_info.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2019 Tildes contributors <code@tildes.net>
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Library code related to displaying info about individual websites."""
+
+from typing import List, Optional
+
+from tildes.enums import ContentMetadataFields, TopicContentType
+
+
+class SiteInfo:
+    """Class containing various info about a particular site."""
+
+    def __init__(
+        self,
+        name: str,
+        show_author: bool = False,
+        content_type: Optional[TopicContentType] = None,
+    ) -> None:
+        """Initialize info for a site."""
+        self.name = name
+        self.show_author = show_author
+        self.content_type = content_type
+
+    def content_source(self, authors: Optional[List[str]] = None) -> str:
+        """Return a string representing the "source" of content on this site.
+
+        If the site isn't one that needs to show its author, this is just its name.
+        """
+        if self.show_author and authors:
+            authors_str = ContentMetadataFields.AUTHORS.format_value(authors)
+            return f"{self.name}: {authors_str}"
+
+        return self.name
+
+
+SITE_INFO_BY_DOMAIN = {
+    "twitter.com": SiteInfo(
+        "Twitter", show_author=True, content_type=TopicContentType.TWEET
+    ),
+    "youtube.com": SiteInfo(
+        "YouTube", show_author=True, content_type=TopicContentType.VIDEO
+    ),
+}
diff --git a/tildes/tildes/models/topic/topic.py b/tildes/tildes/models/topic/topic.py
index 3c9aebd..3469e33 100644
--- a/tildes/tildes/models/topic/topic.py
+++ b/tildes/tildes/models/topic/topic.py
@@ -31,6 +31,7 @@ from tildes.lib.database import TagList
 from tildes.lib.datetime import utc_from_timestamp, utc_now
 from tildes.lib.id import id_to_id36
 from tildes.lib.markdown import convert_markdown_to_safe_html
+from tildes.lib.site_info import SITE_INFO_BY_DOMAIN
 from tildes.lib.string import convert_to_url_slug
 from tildes.lib.url import get_domain_from_url
 from tildes.metrics import incr_counter
@@ -397,15 +398,13 @@ class Topic(DatabaseModel):
         if not self.is_link_type:
             raise ValueError("Non-link topics do not have a link source")
 
-        domain = self.link_domain
-        authors = self.get_content_metadata("authors")
+        # if there's no SiteInfo object for this domain, just return the domain itself
+        try:
+            site = SITE_INFO_BY_DOMAIN[self.link_domain]
+        except KeyError:
+            return self.link_domain
 
-        if domain == "twitter.com" and authors:
-            return f"Twitter: @{authors[0]}"
-        elif domain == "youtube.com" and authors:
-            return f"YouTube: {authors[0]}"
-
-        return domain
+        return site.content_source(self.get_content_metadata("authors"))
 
     @property
     def is_spoiler(self) -> bool:
@@ -455,15 +454,10 @@ class Topic(DatabaseModel):
             elif url_path.suffix.lower() in (".gif", ".jpeg", ".jpg", ".png"):
                 return TopicContentType.IMAGE
 
-            # individual sites should be handled in a more general manner; fine for now
-            if self.link_domain == "youtube.com" and parsed_url.path == "/watch":
-                return TopicContentType.VIDEO
-
-            try:
-                if self.link_domain == "twitter.com" and url_path.parts[2] == "status":
-                    return TopicContentType.TWEET
-            except IndexError:
-                pass
+            # if the site has its own logic in a SiteInfo object, use that
+            site = SITE_INFO_BY_DOMAIN.get(self.link_domain)
+            if site:
+                return site.content_type
 
             # consider it an article if we picked up a word count of at least 200
             word_count = self.get_content_metadata("word_count")