From 5968fcd139bfb8b64076d93c74c4245256e67d3e Mon Sep 17 00:00:00 2001
From: Deimos <deimos@tildes.net>
Date: Tue, 7 Aug 2018 19:04:26 -0600
Subject: [PATCH] Add handling for "curly" apostrophes

There was some special handling of apostrophes in two string-related
functions: the one for generating url slugs, as well as the one for
doing a word count. Both of these weren't handling "curly" apostrophes
(unicode char 0x2019) properly before, so they've both been updated now.
---
 tildes/tests/test_string.py | 13 +++++++++++++
 tildes/tildes/lib/string.py |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/tildes/tests/test_string.py b/tildes/tests/test_string.py
index e7ab4ef..d7b637e 100644
--- a/tildes/tests/test_string.py
+++ b/tildes/tests/test_string.py
@@ -76,6 +76,13 @@ def test_url_slug_with_punctuation():
     assert convert_to_url_slug(original) == expected
 
 
+def test_url_slug_with_apostrophes():
+    """Ensure url slugs don't replace apostrophes with underscores."""
+    original = "Here's what we don’t want as underscores"
+    expected = "heres_what_we_dont_want_as_underscores"
+    assert convert_to_url_slug(original) == expected
+
+
 def test_url_slug_truncation():
     """Ensure a simple url slug truncates as expected."""
     original = "Here's another string to truncate."
@@ -119,6 +126,12 @@ def test_word_count_with_apostrophes():
     assert word_count(string) == 9
 
 
+def test_word_count_with_curly_apostrophes():
+    """Ensure curly apostrophes don't mess up the word count."""
+    string = "It’s not always false that apostrophes aren’t counted properly."
+    assert word_count(string) == 9
+
+
 def test_word_count_with_lots_of_punctuation():
     """Ensure word count works properly with lots of punctuation."""
     string = (
diff --git a/tildes/tildes/lib/string.py b/tildes/tildes/lib/string.py
index 0da54e8..0cd818b 100644
--- a/tildes/tildes/lib/string.py
+++ b/tildes/tildes/lib/string.py
@@ -7,7 +7,7 @@ from urllib.parse import quote
 
 
 # regex for matching an entire word, handles words that include an apostrophe
-WORD_REGEX = re.compile(r"\w[\w']*")
+WORD_REGEX = re.compile(r"\w[\w'’]*")
 
 
 def word_count(string: str) -> int:
@@ -20,7 +20,7 @@ def convert_to_url_slug(original: str, max_length: int = 100) -> str:
     slug = original.lower()
 
     # remove apostrophes so contractions don't get broken up by underscores
-    slug = slug.replace("'", '')
+    slug = re.sub("['’]", '', slug)
 
     # replace all remaining non-word characters with underscores
     slug = re.sub(r'\W+', '_', slug)