From 32bcbf1f95422deb42ec5bbab4d61b82c194f38e Mon Sep 17 00:00:00 2001 From: Deimos Date: Sun, 30 Sep 2018 22:30:54 -0600 Subject: [PATCH] Add timeout to Embedly scraper --- tildes/consumers/topic_embedly_extractor.py | 4 ++-- tildes/tildes/scrapers/embedly_scraper.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tildes/consumers/topic_embedly_extractor.py b/tildes/consumers/topic_embedly_extractor.py index 68ca723..fa4209c 100644 --- a/tildes/consumers/topic_embedly_extractor.py +++ b/tildes/consumers/topic_embedly_extractor.py @@ -9,7 +9,7 @@ from typing import Sequence from amqpy import Message from pyramid.paster import bootstrap -from requests.exceptions import HTTPError +from requests.exceptions import HTTPError, Timeout from sqlalchemy import cast, desc, func from sqlalchemy.dialects.postgresql import JSONB @@ -61,7 +61,7 @@ class TopicEmbedlyExtractor(PgsqlQueueConsumer): if not result: try: result = self.scraper.scrape_url(topic.link) - except HTTPError: + except (HTTPError, Timeout): return self.db_session.add(result) diff --git a/tildes/tildes/scrapers/embedly_scraper.py b/tildes/tildes/scrapers/embedly_scraper.py index c99985a..fc0a4c5 100644 --- a/tildes/tildes/scrapers/embedly_scraper.py +++ b/tildes/tildes/scrapers/embedly_scraper.py @@ -23,7 +23,9 @@ class EmbedlyScraper: """Scrape a url and return the result.""" params: Dict[str, Any] = {"key": self.api_key, "format": "json", "url": url} - response = requests.get("https://api.embedly.com/1/extract", params=params) + response = requests.get( + "https://api.embedly.com/1/extract", params=params, timeout=5 + ) response.raise_for_status() return ScraperResult(url, ScraperType.EMBEDLY, response.json())