diff --git a/tildes/consumers/topic_embedly_extractor.py b/tildes/consumers/topic_embedly_extractor.py index 68ca723..fa4209c 100644 --- a/tildes/consumers/topic_embedly_extractor.py +++ b/tildes/consumers/topic_embedly_extractor.py @@ -9,7 +9,7 @@ from typing import Sequence from amqpy import Message from pyramid.paster import bootstrap -from requests.exceptions import HTTPError +from requests.exceptions import HTTPError, Timeout from sqlalchemy import cast, desc, func from sqlalchemy.dialects.postgresql import JSONB @@ -61,7 +61,7 @@ class TopicEmbedlyExtractor(PgsqlQueueConsumer): if not result: try: result = self.scraper.scrape_url(topic.link) - except HTTPError: + except (HTTPError, Timeout): return self.db_session.add(result) diff --git a/tildes/tildes/scrapers/embedly_scraper.py b/tildes/tildes/scrapers/embedly_scraper.py index c99985a..fc0a4c5 100644 --- a/tildes/tildes/scrapers/embedly_scraper.py +++ b/tildes/tildes/scrapers/embedly_scraper.py @@ -23,7 +23,9 @@ class EmbedlyScraper: """Scrape a url and return the result.""" params: Dict[str, Any] = {"key": self.api_key, "format": "json", "url": url} - response = requests.get("https://api.embedly.com/1/extract", params=params) + response = requests.get( + "https://api.embedly.com/1/extract", params=params, timeout=5 + ) response.raise_for_status() return ScraperResult(url, ScraperType.EMBEDLY, response.json())