Add ability to exempt urls from transformations

This is a bit flimsy, but when I started looking at applying the existing transformations to old posts, I found the Paradox forums as an example of links that became broken after they were processed (because "fixing" their links ends up breaking them). This will give a way to exempt any other domains or urls that end up being a problem, though over the long term it would probably be better to make this database-based instead of code-based.
7 years ago · 50fec63afa
2 changed files with 20 additions and 0 deletions
--- a/tildes/tests/test_url_transform.py
+++ b/tildes/tests/test_url_transform.py
@ -79,3 +79,10 @@ def test_youtube_unshortened():
    transformed_url = apply_url_transformations(url)

    assert transformed_url == "https://www.youtube.com/watch?v=YbJOTdZBX1g&t=1"
+
+
+def test_exempt_url_not_transformed():
+    """Ensure that an exempt url doesn't get transformed."""
+    url = "https://forum.paradoxplaza.com/forum/index.php?forums/518/"
+
+    assert apply_url_transformations(url) == url
--- a/tildes/tildes/lib/url_transform.py
+++ b/tildes/tildes/lib/url_transform.py
@ -30,6 +30,9 @@ def apply_url_transformations(url: str) -> str:
    """
    parsed_url = urlparse(url)

+    if _is_exempt_from_transformations(parsed_url):
+        return url
+
    try:
        parsed_url = _apply_all_transformations(parsed_url)
    except UrlTransformationLoopError:
@ -39,6 +42,16 @@ def apply_url_transformations(url: str) -> str:
    return urlunparse(parsed_url)


+def _is_exempt_from_transformations(parsed_url: ParseResult) -> bool:
+    """Return whether this url should be exempt from the transformation process."""
+
+    # Paradox forums use an invalid url scheme that will break if processed
+    if parsed_url.hostname == "forum.paradoxplaza.com":
+        return True
+
+    return False
+
+
 def _apply_all_transformations(parsed_url: ParseResult) -> ParseResult:
    """Apply all relevant UrlTransformer transformations to the url."""
    # Used to keep track of which transformations are restarting the process, so we