Browse Source

Wipe old metadata from link topics on link change

This is a bit ugly (and probably the wrong spot to do it), but we need
to wipe the old metadata before adding in new stuff from the
re-scrape(s). Otherwise, if we do something like change a YouTube video
to an article, the duration of the video will still be left in the
metadata even though it's no longer relevant.
merge-requests/64/head
Deimos 6 years ago
parent
commit
18002b12a5
  1. 13
      tildes/tildes/lib/link_metadata.py
  2. 18
      tildes/tildes/views/api/web/topic.py

13
tildes/tildes/lib/link_metadata.py

@ -0,0 +1,13 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Constants/classes/functions related to metadata generated from links."""
METADATA_KEYS = [
"authors",
"description",
"duration",
"published",
"title",
"word_count",
]

18
tildes/tildes/views/api/web/topic.py

@ -8,10 +8,13 @@ from marshmallow.fields import String
from pyramid.httpexceptions import HTTPNotFound from pyramid.httpexceptions import HTTPNotFound
from pyramid.response import Response from pyramid.response import Response
from pyramid.request import Request from pyramid.request import Request
from sqlalchemy import cast, Text
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from webargs.pyramidparser import use_kwargs from webargs.pyramidparser import use_kwargs
from tildes.enums import LogEventType from tildes.enums import LogEventType
from tildes.lib.link_metadata import METADATA_KEYS
from tildes.models.group import Group from tildes.models.group import Group
from tildes.models.log import LogTopic from tildes.models.log import LogTopic
from tildes.models.topic import Topic, TopicBookmark, TopicVote from tildes.models.topic import Topic, TopicBookmark, TopicVote
@ -350,6 +353,21 @@ def patch_topic_link(request: Request, link: str) -> dict:
) )
) )
# Wipe any old metadata from scrapers so we don't leave behind remnants
# (this probably really shouldn't be done here, but it's fine for now)
(
request.query(Topic)
.filter(Topic.topic_id == topic.topic_id)
.update(
{
"content_metadata": Topic.content_metadata.op("-")( # type: ignore
cast(METADATA_KEYS, ARRAY(Text))
)
},
synchronize_session=False,
)
)
topic.link = link topic.link = link
return Response(f'<a href="{topic.link}">{topic.link}</a>') return Response(f'<a href="{topic.link}">{topic.link}</a>')

Loading…
Cancel
Save