Browse Source

Remove utm_ query params from link topics

Adds the start of a "url transformation" system, initially only removing
any utm_ parameters from link topics.
merge-requests/53/head
Deimos 6 years ago
parent
commit
861e5f9589
  1. 20
      tildes/tests/test_url_transform.py
  2. 34
      tildes/tildes/lib/url_transform.py
  3. 3
      tildes/tildes/models/topic/topic.py

20
tildes/tests/test_url_transform.py

@ -0,0 +1,20 @@
# Copyright (c) 2018 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
from tildes.lib.url_transform import apply_url_transformations
def test_remove_utm_query_params():
"""Ensure that utm query params are removed but others are left."""
url = "http://example.com/path?utm_source=tildes&utm_campaign=test&something=ok"
cleaned_url = apply_url_transformations(url)
assert cleaned_url == "http://example.com/path?something=ok"
def test_non_utm_params_unaffected():
"""Ensure that non-utm_ query params aren't removed."""
url = "http://example.com/path?one=x&two=y&three=z"
cleaned_url = apply_url_transformations(url)
assert cleaned_url == url

34
tildes/tildes/lib/url_transform.py

@ -0,0 +1,34 @@
# Copyright (c) 2018 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Functions related to transforming URLs (sanitization, cleanup, etc.)."""
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
def apply_url_transformations(url: str) -> str:
"""Apply all applicable transformations to a url.
This method should generally be the only one imported/used from this module, unless
there is a specific reason for needing to apply a subset of transformations.
"""
url = remove_utm_query_params(url)
return url
def remove_utm_query_params(url: str) -> str:
"""Remove any utm_* query parameters from a url."""
parsed = urlparse(url)
query_params = parse_qs(parsed.query)
cleaned_params = {
param: value
for param, value in query_params.items()
if not param.startswith("utm_")
}
parsed = parsed._replace(query=urlencode(cleaned_params, doseq=True))
return urlunparse(parsed)

3
tildes/tildes/models/topic/topic.py

@ -32,6 +32,7 @@ from tildes.lib.id import id_to_id36
from tildes.lib.markdown import convert_markdown_to_safe_html
from tildes.lib.string import convert_to_url_slug
from tildes.lib.url import get_domain_from_url, is_tweet
from tildes.lib.url_transform import apply_url_transformations
from tildes.metrics import incr_counter
from tildes.models import DatabaseModel
from tildes.models.group import Group
@ -207,7 +208,7 @@ class Topic(DatabaseModel):
"""Create a new link topic."""
new_topic = cls._create_base_topic(group, author, title)
new_topic.topic_type = TopicType.LINK
new_topic.link = link
new_topic.link = apply_url_transformations(link)
new_topic.original_url = link
incr_counter("topics", type="link")

Loading…
Cancel
Save