diff --git a/tildes/tildes/scrapers/embedly_scraper.py b/tildes/tildes/scrapers/embedly_scraper.py index 667e612..3213ad0 100644 --- a/tildes/tildes/scrapers/embedly_scraper.py +++ b/tildes/tildes/scrapers/embedly_scraper.py @@ -4,6 +4,7 @@ """Contains the EmbedlyScraper class.""" from typing import Any, Dict +from urllib.parse import urlparse import requests @@ -19,6 +20,19 @@ class EmbedlyScraper: """Create a new scraper using the specified Embedly API key.""" self.api_key = api_key + def is_applicable(self, url: str) -> bool: + """Return whether this scraper is applicable to a particular url.""" + parsed_url = urlparse(url) + + # exclude links to YouTube videos, since we have a dedicated scraper for those + if ( + parsed_url.hostname in ("www.youtube.com", "youtube.com") + and parsed_url.path == "/watch" + ): + return False + + return True + def scrape_url(self, url: str) -> ScraperResult: """Scrape a url and return the result.""" params: Dict[str, Any] = {"key": self.api_key, "format": "json", "url": url}