From 5968fcd139bfb8b64076d93c74c4245256e67d3e Mon Sep 17 00:00:00 2001 From: Deimos Date: Tue, 7 Aug 2018 19:04:26 -0600 Subject: [PATCH] Add handling for "curly" apostrophes There was some special handling of apostrophes in two string-related functions: the one for generating url slugs, as well as the one for doing a word count. Both of these weren't handling "curly" apostrophes (unicode char 0x2019) properly before, so they've both been updated now. --- tildes/tests/test_string.py | 13 +++++++++++++ tildes/tildes/lib/string.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tildes/tests/test_string.py b/tildes/tests/test_string.py index e7ab4ef..d7b637e 100644 --- a/tildes/tests/test_string.py +++ b/tildes/tests/test_string.py @@ -76,6 +76,13 @@ def test_url_slug_with_punctuation(): assert convert_to_url_slug(original) == expected +def test_url_slug_with_apostrophes(): + """Ensure url slugs don't replace apostrophes with underscores.""" + original = "Here's what we don’t want as underscores" + expected = "heres_what_we_dont_want_as_underscores" + assert convert_to_url_slug(original) == expected + + def test_url_slug_truncation(): """Ensure a simple url slug truncates as expected.""" original = "Here's another string to truncate." @@ -119,6 +126,12 @@ def test_word_count_with_apostrophes(): assert word_count(string) == 9 +def test_word_count_with_curly_apostrophes(): + """Ensure curly apostrophes don't mess up the word count.""" + string = "It’s not always false that apostrophes aren’t counted properly." + assert word_count(string) == 9 + + def test_word_count_with_lots_of_punctuation(): """Ensure word count works properly with lots of punctuation.""" string = ( diff --git a/tildes/tildes/lib/string.py b/tildes/tildes/lib/string.py index 0da54e8..0cd818b 100644 --- a/tildes/tildes/lib/string.py +++ b/tildes/tildes/lib/string.py @@ -7,7 +7,7 @@ from urllib.parse import quote # regex for matching an entire word, handles words that include an apostrophe -WORD_REGEX = re.compile(r"\w[\w']*") +WORD_REGEX = re.compile(r"\w[\w'’]*") def word_count(string: str) -> int: @@ -20,7 +20,7 @@ def convert_to_url_slug(original: str, max_length: int = 100) -> str: slug = original.lower() # remove apostrophes so contractions don't get broken up by underscores - slug = slug.replace("'", '') + slug = re.sub("['’]", '', slug) # replace all remaining non-word characters with underscores slug = re.sub(r'\W+', '_', slug)