From 50fec63afa61587a5fcec1ab0c1d0f3b83c13783 Mon Sep 17 00:00:00 2001 From: Deimos Date: Thu, 10 Jan 2019 23:45:33 -0700 Subject: [PATCH] Add ability to exempt urls from transformations This is a bit flimsy, but when I started looking at applying the existing transformations to old posts, I found the Paradox forums as an example of links that became broken after they were processed (because "fixing" their links ends up breaking them). This will give a way to exempt any other domains or urls that end up being a problem, though over the long term it would probably be better to make this database-based instead of code-based. --- tildes/tests/test_url_transform.py | 7 +++++++ tildes/tildes/lib/url_transform.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tildes/tests/test_url_transform.py b/tildes/tests/test_url_transform.py index 85b7e26..23db521 100644 --- a/tildes/tests/test_url_transform.py +++ b/tildes/tests/test_url_transform.py @@ -79,3 +79,10 @@ def test_youtube_unshortened(): transformed_url = apply_url_transformations(url) assert transformed_url == "https://www.youtube.com/watch?v=YbJOTdZBX1g&t=1" + + +def test_exempt_url_not_transformed(): + """Ensure that an exempt url doesn't get transformed.""" + url = "https://forum.paradoxplaza.com/forum/index.php?forums/518/" + + assert apply_url_transformations(url) == url diff --git a/tildes/tildes/lib/url_transform.py b/tildes/tildes/lib/url_transform.py index 053632c..6b38bc8 100644 --- a/tildes/tildes/lib/url_transform.py +++ b/tildes/tildes/lib/url_transform.py @@ -30,6 +30,9 @@ def apply_url_transformations(url: str) -> str: """ parsed_url = urlparse(url) + if _is_exempt_from_transformations(parsed_url): + return url + try: parsed_url = _apply_all_transformations(parsed_url) except UrlTransformationLoopError: @@ -39,6 +42,16 @@ def apply_url_transformations(url: str) -> str: return urlunparse(parsed_url) +def _is_exempt_from_transformations(parsed_url: ParseResult) -> bool: + """Return whether this url should be exempt from the transformation process.""" + + # Paradox forums use an invalid url scheme that will break if processed + if parsed_url.hostname == "forum.paradoxplaza.com": + return True + + return False + + def _apply_all_transformations(parsed_url: ParseResult) -> ParseResult: """Apply all relevant UrlTransformer transformations to the url.""" # Used to keep track of which transformations are restarting the process, so we