From 18002b12a519fb9fd9da338da20bd9e97a62e41a Mon Sep 17 00:00:00 2001 From: Deimos Date: Fri, 15 Mar 2019 18:03:18 -0600 Subject: [PATCH] Wipe old metadata from link topics on link change This is a bit ugly (and probably the wrong spot to do it), but we need to wipe the old metadata before adding in new stuff from the re-scrape(s). Otherwise, if we do something like change a YouTube video to an article, the duration of the video will still be left in the metadata even though it's no longer relevant. --- tildes/tildes/lib/link_metadata.py | 13 +++++++++++++ tildes/tildes/views/api/web/topic.py | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 tildes/tildes/lib/link_metadata.py diff --git a/tildes/tildes/lib/link_metadata.py b/tildes/tildes/lib/link_metadata.py new file mode 100644 index 0000000..ce6d0c5 --- /dev/null +++ b/tildes/tildes/lib/link_metadata.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Constants/classes/functions related to metadata generated from links.""" + +METADATA_KEYS = [ + "authors", + "description", + "duration", + "published", + "title", + "word_count", +] diff --git a/tildes/tildes/views/api/web/topic.py b/tildes/tildes/views/api/web/topic.py index 47d0a82..6097cce 100644 --- a/tildes/tildes/views/api/web/topic.py +++ b/tildes/tildes/views/api/web/topic.py @@ -8,10 +8,13 @@ from marshmallow.fields import String from pyramid.httpexceptions import HTTPNotFound from pyramid.response import Response from pyramid.request import Request +from sqlalchemy import cast, Text +from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.exc import IntegrityError from webargs.pyramidparser import use_kwargs from tildes.enums import LogEventType +from tildes.lib.link_metadata import METADATA_KEYS from tildes.models.group import Group from tildes.models.log import LogTopic from tildes.models.topic import Topic, TopicBookmark, TopicVote @@ -350,6 +353,21 @@ def patch_topic_link(request: Request, link: str) -> dict: ) ) + # Wipe any old metadata from scrapers so we don't leave behind remnants + # (this probably really shouldn't be done here, but it's fine for now) + ( + request.query(Topic) + .filter(Topic.topic_id == topic.topic_id) + .update( + { + "content_metadata": Topic.content_metadata.op("-")( # type: ignore + cast(METADATA_KEYS, ARRAY(Text)) + ) + }, + synchronize_session=False, + ) + ) + topic.link = link return Response(f'{topic.link}')