diff --git a/tildes/tildes/lib/link_metadata.py b/tildes/tildes/lib/link_metadata.py new file mode 100644 index 0000000..ce6d0c5 --- /dev/null +++ b/tildes/tildes/lib/link_metadata.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Constants/classes/functions related to metadata generated from links.""" + +METADATA_KEYS = [ + "authors", + "description", + "duration", + "published", + "title", + "word_count", +] diff --git a/tildes/tildes/views/api/web/topic.py b/tildes/tildes/views/api/web/topic.py index 47d0a82..6097cce 100644 --- a/tildes/tildes/views/api/web/topic.py +++ b/tildes/tildes/views/api/web/topic.py @@ -8,10 +8,13 @@ from marshmallow.fields import String from pyramid.httpexceptions import HTTPNotFound from pyramid.response import Response from pyramid.request import Request +from sqlalchemy import cast, Text +from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.exc import IntegrityError from webargs.pyramidparser import use_kwargs from tildes.enums import LogEventType +from tildes.lib.link_metadata import METADATA_KEYS from tildes.models.group import Group from tildes.models.log import LogTopic from tildes.models.topic import Topic, TopicBookmark, TopicVote @@ -350,6 +353,21 @@ def patch_topic_link(request: Request, link: str) -> dict: ) ) + # Wipe any old metadata from scrapers so we don't leave behind remnants + # (this probably really shouldn't be done here, but it's fine for now) + ( + request.query(Topic) + .filter(Topic.topic_id == topic.topic_id) + .update( + { + "content_metadata": Topic.content_metadata.op("-")( # type: ignore + cast(METADATA_KEYS, ARRAY(Text)) + ) + }, + synchronize_session=False, + ) + ) + topic.link = link return Response(f'{topic.link}')