Browse Source

Add SiteInfo to start generalizing site handling

This moves some of the site-specific logic that was previously embedded
in Topic for YouTube and Twitter links into a more general class named
SiteInfo. This should be expanded more in the future, but will help for
defining site names, ways of displaying their content creators, ability
to define a content type on a per-site basis, and so on.
merge-requests/110/head
Deimos 5 years ago
parent
commit
35256fb3fc
  1. 3
      tildes/tildes/enums.py
  2. 44
      tildes/tildes/lib/site_info.py
  3. 28
      tildes/tildes/models/topic/topic.py

3
tildes/tildes/enums.py

@ -118,6 +118,9 @@ class ContentMetadataFields(enum.Enum):
def format_value(self, value: Any) -> str:
"""Format a value stored in this field into a string for display."""
if self.name == "AUTHORS":
return ", ".join(value)
if self.name == "DURATION":
delta = timedelta(seconds=value)

44
tildes/tildes/lib/site_info.py

@ -0,0 +1,44 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Library code related to displaying info about individual websites."""
from typing import List, Optional
from tildes.enums import ContentMetadataFields, TopicContentType
class SiteInfo:
"""Class containing various info about a particular site."""
def __init__(
self,
name: str,
show_author: bool = False,
content_type: Optional[TopicContentType] = None,
) -> None:
"""Initialize info for a site."""
self.name = name
self.show_author = show_author
self.content_type = content_type
def content_source(self, authors: Optional[List[str]] = None) -> str:
"""Return a string representing the "source" of content on this site.
If the site isn't one that needs to show its author, this is just its name.
"""
if self.show_author and authors:
authors_str = ContentMetadataFields.AUTHORS.format_value(authors)
return f"{self.name}: {authors_str}"
return self.name
SITE_INFO_BY_DOMAIN = {
"twitter.com": SiteInfo(
"Twitter", show_author=True, content_type=TopicContentType.TWEET
),
"youtube.com": SiteInfo(
"YouTube", show_author=True, content_type=TopicContentType.VIDEO
),
}

28
tildes/tildes/models/topic/topic.py

@ -31,6 +31,7 @@ from tildes.lib.database import TagList
from tildes.lib.datetime import utc_from_timestamp, utc_now
from tildes.lib.id import id_to_id36
from tildes.lib.markdown import convert_markdown_to_safe_html
from tildes.lib.site_info import SITE_INFO_BY_DOMAIN
from tildes.lib.string import convert_to_url_slug
from tildes.lib.url import get_domain_from_url
from tildes.metrics import incr_counter
@ -397,15 +398,13 @@ class Topic(DatabaseModel):
if not self.is_link_type:
raise ValueError("Non-link topics do not have a link source")
domain = self.link_domain
authors = self.get_content_metadata("authors")
# if there's no SiteInfo object for this domain, just return the domain itself
try:
site = SITE_INFO_BY_DOMAIN[self.link_domain]
except KeyError:
return self.link_domain
if domain == "twitter.com" and authors:
return f"Twitter: @{authors[0]}"
elif domain == "youtube.com" and authors:
return f"YouTube: {authors[0]}"
return domain
return site.content_source(self.get_content_metadata("authors"))
@property
def is_spoiler(self) -> bool:
@ -455,15 +454,10 @@ class Topic(DatabaseModel):
elif url_path.suffix.lower() in (".gif", ".jpeg", ".jpg", ".png"):
return TopicContentType.IMAGE
# individual sites should be handled in a more general manner; fine for now
if self.link_domain == "youtube.com" and parsed_url.path == "/watch":
return TopicContentType.VIDEO
try:
if self.link_domain == "twitter.com" and url_path.parts[2] == "status":
return TopicContentType.TWEET
except IndexError:
pass
# if the site has its own logic in a SiteInfo object, use that
site = SITE_INFO_BY_DOMAIN.get(self.link_domain)
if site:
return site.content_type
# consider it an article if we picked up a word count of at least 200
word_count = self.get_content_metadata("word_count")

Loading…
Cancel
Save