Browse Source

YoutubeScraper: handle API returning blank result

merge-requests/55/head
Deimos 6 years ago
parent
commit
1537785c2d
  1. 4
      tildes/consumers/topic_youtube_scraper.py
  2. 1
      tildes/tildes/scrapers/__init__.py
  3. 10
      tildes/tildes/scrapers/exceptions.py
  4. 8
      tildes/tildes/scrapers/youtube_scraper.py

4
tildes/consumers/topic_youtube_scraper.py

@ -18,7 +18,7 @@ from tildes.lib.amqp import PgsqlQueueConsumer
from tildes.lib.datetime import utc_now from tildes.lib.datetime import utc_now
from tildes.models.scraper import ScraperResult from tildes.models.scraper import ScraperResult
from tildes.models.topic import Topic from tildes.models.topic import Topic
from tildes.scrapers import YoutubeScraper
from tildes.scrapers import ScraperError, YoutubeScraper
# don't rescrape the same url inside this time period # don't rescrape the same url inside this time period
@ -62,7 +62,7 @@ class TopicYoutubeScraper(PgsqlQueueConsumer):
if not result: if not result:
try: try:
result = self.scraper.scrape_url(topic.link) result = self.scraper.scrape_url(topic.link)
except (HTTPError, Timeout):
except (HTTPError, ScraperError, Timeout):
return return
self.db_session.add(result) self.db_session.add(result)

1
tildes/tildes/scrapers/__init__.py

@ -1,4 +1,5 @@
"""Contains scrapers.""" """Contains scrapers."""
from .embedly_scraper import EmbedlyScraper from .embedly_scraper import EmbedlyScraper
from .exceptions import ScraperError
from .youtube_scraper import YoutubeScraper from .youtube_scraper import YoutubeScraper

10
tildes/tildes/scrapers/exceptions.py

@ -0,0 +1,10 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Exception classes related to scraping."""
class ScraperError(Exception):
"""Exception class for an error while scraping."""
pass

8
tildes/tildes/scrapers/youtube_scraper.py

@ -13,6 +13,7 @@ import requests
from tildes.enums import ScraperType from tildes.enums import ScraperType
from tildes.models.scraper import ScraperResult from tildes.models.scraper import ScraperResult
from .exceptions import ScraperError
# Only parses the subset of ISO8601 durations that YouTube uses # Only parses the subset of ISO8601 durations that YouTube uses
@ -67,7 +68,12 @@ class YoutubeScraper:
) )
response.raise_for_status() response.raise_for_status()
return ScraperResult(url, ScraperType.YOUTUBE, response.json()["items"][0])
try:
video_data = response.json()["items"][0]
except (KeyError, IndexError):
raise ScraperError(f"No data returned for video with ID {video_id}")
return ScraperResult(url, ScraperType.YOUTUBE, video_data)
@staticmethod @staticmethod
def get_metadata_from_result(result: ScraperResult) -> Dict[str, Any]: def get_metadata_from_result(result: ScraperResult) -> Dict[str, Any]:

Loading…
Cancel
Save