Browse Source

Add better control of content metadata by type

Previously, the content metadata displayed next to a topic's content
type (like "Article: 1800 words") was fairly generic and could result in
strange data being displayed if a scraper fetched it for an
inappropriate type (for example, displaying word count for videos).

This creates an enum to hold all the different content metadata fields,
and moves some logic into that class to handle deciding which fields to
show for different types, and the formatting logic for values.
merge-requests/85/head
Deimos 5 years ago
parent
commit
af66a76054
  1. 66
      tildes/tildes/enums.py
  2. 13
      tildes/tildes/lib/link_metadata.py
  3. 47
      tildes/tildes/models/topic/topic.py
  4. 19
      tildes/tildes/views/api/web/topic.py

66
tildes/tildes/enums.py

@ -4,7 +4,10 @@
"""Contains Enum classes."""
import enum
from typing import Optional
from datetime import timedelta
from typing import Any, List, Optional
from tildes.lib.datetime import utc_from_timestamp
class CommentNotificationType(enum.Enum):
@ -80,6 +83,67 @@ class CommentLabelOption(enum.Enum):
return None
class ContentMetadataFields(enum.Enum):
"""Enum for the fields of content metadata stored and used (for topics)."""
AUTHORS = enum.auto()
DESCRIPTION = enum.auto()
DOMAIN = enum.auto()
DURATION = enum.auto()
EXCERPT = enum.auto()
PUBLISHED = enum.auto()
TITLE = enum.auto()
WORD_COUNT = enum.auto()
@property
def key(self) -> str:
"""Return the key to store this field under."""
return self.name.lower()
@classmethod
def detail_fields_for_content_type(
cls, content_type: "TopicContentType",
) -> List["ContentMetadataFields"]:
"""Return a list of fields to display for detail about a particular type."""
if content_type is TopicContentType.ARTICLE:
return [cls.WORD_COUNT, cls.PUBLISHED]
if content_type is TopicContentType.TEXT:
return [cls.WORD_COUNT]
if content_type is TopicContentType.VIDEO:
return [cls.DURATION, cls.PUBLISHED]
return []
def format_value(self, value: Any) -> str:
"""Format a value stored in this field into a string for display."""
if self.name == "WORD_COUNT":
if value == 1:
return "1 word"
return f"{value} words"
if self.name == "DURATION":
delta = timedelta(seconds=value)
# When converted to str, timedelta always includes hours and minutes,
# so we want to strip off all the excess zeros and/or colons. However,
# if it's less than a minute we'll need to add one back.
duration_str = str(delta).lstrip("0:")
if value < 60:
duration_str = f"0:{duration_str}"
return duration_str
if self.name == "PUBLISHED":
published = utc_from_timestamp(value)
date_str = published.strftime("%b %-d %Y")
return f"published {date_str}"
return str(value)
class FinancialEntryType(enum.Enum):
"""Enum for entry types in the Financials table."""

13
tildes/tildes/lib/link_metadata.py

@ -1,13 +0,0 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Constants/classes/functions related to metadata generated from links."""
METADATA_KEYS = [
"authors",
"description",
"duration",
"published",
"title",
"word_count",
]

47
tildes/tildes/models/topic/topic.py

@ -26,7 +26,7 @@ from sqlalchemy.orm import deferred, relationship
from sqlalchemy.sql.expression import text
from titlecase import titlecase
from tildes.enums import TopicContentType, TopicType
from tildes.enums import ContentMetadataFields, TopicContentType, TopicType
from tildes.lib.database import TagList
from tildes.lib.datetime import utc_from_timestamp, utc_now
from tildes.lib.id import id_to_id36
@ -461,38 +461,25 @@ class Topic(DatabaseModel):
@property
def content_metadata_for_display(self) -> str:
"""Return a string of the content's metadata, suitable for display."""
if not self.content_type:
return ""
metadata_strings = []
# display word count (if we have it) with either type of topic
word_count = self.get_content_metadata("word_count")
if word_count is not None:
if word_count == 1:
metadata_strings.append("1 word")
else:
metadata_strings.append(f"{word_count} words")
fields = ContentMetadataFields.detail_fields_for_content_type(self.content_type)
if self.is_link_type:
# display the duration if we have it
duration = self.get_content_metadata("duration")
if duration:
duration_delta = timedelta(seconds=duration)
# When converted to str, timedelta always includes hours and minutes,
# so we want to strip off all the excess zeros and/or colons. However,
# if it's less than a minute we'll need to add one back.
duration_str = str(duration_delta).lstrip("0:")
if duration < 60:
duration_str = f"0:{duration_str}"
metadata_strings.append(duration_str)
# display the published date if it's more than 3 days before the topic
published_timestamp = self.get_content_metadata("published")
if published_timestamp:
published = utc_from_timestamp(published_timestamp)
if self.created_time - published > timedelta(days=3):
date_str = published.strftime("%b %-d %Y")
metadata_strings.append(f"published {date_str}")
for field in fields:
value = self.get_content_metadata(field.key)
if not value:
continue
# only show published date if it's more than 3 days before the topic
if field is ContentMetadataFields.PUBLISHED:
published = utc_from_timestamp(value)
if self.created_time - published < timedelta(days=3):
continue
metadata_strings.append(field.format_value(value))
return ", ".join(metadata_strings)

19
tildes/tildes/views/api/web/topic.py

@ -8,13 +8,10 @@ from marshmallow.fields import String
from pyramid.httpexceptions import HTTPNotFound
from pyramid.request import Request
from pyramid.response import Response
from sqlalchemy import cast, Text
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.exc import IntegrityError
from webargs.pyramidparser import use_kwargs
from tildes.enums import LogEventType
from tildes.lib.link_metadata import METADATA_KEYS
from tildes.models.group import Group
from tildes.models.log import LogTopic
from tildes.models.topic import Topic, TopicBookmark, TopicVote
@ -393,22 +390,8 @@ def patch_topic_link(request: Request, link: str) -> dict:
)
)
# Wipe any old metadata from scrapers so we don't leave behind remnants
# (this probably really shouldn't be done here, but it's fine for now)
(
request.query(Topic)
.filter(Topic.topic_id == topic.topic_id)
.update(
{
"content_metadata": Topic.content_metadata.op("-")( # type: ignore
cast(METADATA_KEYS, ARRAY(Text))
)
},
synchronize_session=False,
)
)
topic.link = link
topic.content_metadata = None
return Response(f'<a href="{topic.link}">{topic.link}</a>')

Loading…
Cancel
Save