From a13b5b01769c08da1a4474dd3f9919ebb3f49074 Mon Sep 17 00:00:00 2001 From: Deimos Date: Thu, 10 Jan 2019 16:17:36 -0700 Subject: [PATCH] Retain blank query params in url transformations --- tildes/tests/test_url_transform.py | 12 ++++++++++++ tildes/tildes/lib/url_transform.py | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tildes/tests/test_url_transform.py b/tildes/tests/test_url_transform.py index e942dae..85b7e26 100644 --- a/tildes/tests/test_url_transform.py +++ b/tildes/tests/test_url_transform.py @@ -4,6 +4,18 @@ from tildes.lib.url_transform import apply_url_transformations +def test_blank_query_params_kept(): + """Ensure that query params with no value make it through the process. + + Some sites treat the presence of blank params different from their absence, so + we don't want to remove them (which urllib's parse_qs and parse_qsl do by default). + """ + url = "http://example.com/path?one=1&two=2&blank=&three=3" + transformed_url = apply_url_transformations(url) + + assert transformed_url == url + + def test_remove_utm_query_params(): """Ensure that utm query params are removed but others are left.""" url = "http://example.com/path?utm_source=tildes&utm_campaign=test&something=ok" diff --git a/tildes/tildes/lib/url_transform.py b/tildes/tildes/lib/url_transform.py index ce5a817..053632c 100644 --- a/tildes/tildes/lib/url_transform.py +++ b/tildes/tildes/lib/url_transform.py @@ -118,7 +118,7 @@ class UtmQueryParamRemover(UrlTransformer): @classmethod def apply_transformation(cls, parsed_url: ParseResult) -> ParseResult: """Apply the actual transformation process to the url.""" - query_params = parse_qs(parsed_url.query) + query_params = parse_qs(parsed_url.query, keep_blank_values=True) cleaned_params = { param: value @@ -154,7 +154,7 @@ class RedditTrackingRemover(UrlTransformer): @classmethod def apply_transformation(cls, parsed_url: ParseResult) -> ParseResult: """Apply the actual transformation process to the url.""" - query_params = parse_qs(parsed_url.query) + query_params = parse_qs(parsed_url.query, keep_blank_values=True) query_params.pop("st", None) query_params.pop("sh", None) @@ -195,7 +195,7 @@ class YoutubeUnshortener(UrlTransformer): video_id = parsed_url.path.strip("/") # use parse_qsl() and insert() here so the v= is always the first query param - query_params = parse_qsl(parsed_url.query) + query_params = parse_qsl(parsed_url.query, keep_blank_values=True) query_params.insert(0, ("v", video_id)) return parsed_url._replace(