Browse Source

Convert mobile Wikipedia links to standard ones

merge-requests/55/head
Deimos 6 years ago
parent
commit
115784453b
  1. 17
      tildes/tests/test_url_transform.py
  2. 20
      tildes/tildes/lib/url_transform.py

17
tildes/tests/test_url_transform.py

@ -42,3 +42,20 @@ def test_reddit_tracking_removed():
cleaned_url = apply_url_transformations(url)
assert cleaned_url == "https://www.reddit.com/r/tildes/comments/8k14is/_/?sort=new"
def test_wikipedia_mobile_conversion():
"""Ensure that links to a Wikipedia page's mobile version are converted."""
url = "https://en.m.wikipedia.org/wiki/Tilde"
transformed_url = apply_url_transformations(url)
assert transformed_url == "https://en.wikipedia.org/wiki/Tilde"
def test_wikipedia_mobile_homepage_not_converted():
"""Ensure that a link to the homepage of mobile Wikipedia doesn't get converted."""
url = "https://en.m.wikipedia.org"
# check both with and without a trailing slash
for test_url in (url, url + "/"):
assert apply_url_transformations(test_url) == test_url

20
tildes/tildes/lib/url_transform.py

@ -69,6 +69,11 @@ def _apply_all_transformations(parsed_url: ParseResult) -> ParseResult:
return parsed_url
def has_path(parsed_url: ParseResult) -> bool:
"""Whether a parsed url has a path component (and not just a trailing slash)."""
return parsed_url.path not in ("", "/")
class UrlTransformer(ABC):
"""Abstract base class for url transformers.
@ -148,3 +153,18 @@ class RedditTrackingRemover(UrlTransformer):
query_params.pop("sh", None)
return parsed_url._replace(query=urlencode(query_params, doseq=True))
class WikipediaMobileConverter(UrlTransformer):
"""Convert links to Wikipedia mobile version to the standard version."""
@classmethod
def is_applicable(cls, parsed_url: ParseResult) -> bool:
"""Return whether this transformation should be applied to the url."""
return parsed_url.hostname.endswith(".m.wikipedia.org") and has_path(parsed_url)
@classmethod
def apply_transformation(cls, parsed_url: ParseResult) -> ParseResult:
"""Apply the actual transformation process to the url."""
new_domain = parsed_url.hostname.replace(".m.wikipedia.org", ".wikipedia.org")
return parsed_url._replace(netloc=new_domain)
Loading…
Cancel
Save