Browse Source

Handle zero width joiner unicode chars for proper emoji support

merge-requests/128/head
Flashynuff 3 years ago
parent
commit
2c4058ede3
  1. 2
      tildes/tests/test_markdown_field.py
  2. 8
      tildes/tests/test_simplestring_field.py
  3. 12
      tildes/tests/test_title.py
  4. 5
      tildes/tildes/lib/string.py
  5. 9
      tildes/tildes/schemas/fields.py

2
tildes/tests/test_markdown_field.py

@ -54,7 +54,7 @@ def test_empty_string():
def test_all_whitespace_string():
"""Ensure a string that's all whitespace chars fails validation."""
with raises(ValidationError):
validate_string(" \n \n\r\n \t ")
validate_string(" \n \n\u200D\r\n \t ")
def test_carriage_returns_stripped():

8
tildes/tests/test_simplestring_field.py

@ -52,7 +52,7 @@ def test_empty_string():
def test_all_whitespace_string():
"""Ensure a string that's entirely whitespace fails validation."""
with raises(ValidationError):
process_string("\n \t \r\n ")
process_string("\n \t \u200D\r\n ")
def test_normal_string_untouched():
@ -77,6 +77,12 @@ def test_control_chars_removed():
assert result == "I can be sneaky and add problemchars."
def test_zero_width_joiners_kept_and_collapsed():
""""Ensure the zero width joiner char is kept and handled like spaces"""
original = "🤷\u200D\u200D\u200D\u200d"
assert process_string(original) == "🤷\u200D\u200d"
def test_leading_trailing_spaces_removed():
"""Ensure leading/trailing spaces are removed from the string."""
original = " Centered! "

12
tildes/tests/test_title.py

@ -35,7 +35,7 @@ def test_empty_title_invalid(title_schema):
def test_whitespace_only_title_invalid(title_schema):
"""Ensure a whitespace-only title is invalid."""
with raises(ValidationError):
title_schema.load({"title": " \n "})
title_schema.load({"title": " \u200D\n "})
def test_whitespace_trimmed(title_schema):
@ -78,3 +78,13 @@ def test_unicode_control_chars_removed(title_schema):
title = "nothing\u0000strange\u0085going\u009con\u007fhere"
result = title_schema.load({"title": title})
assert result["title"] == "nothingstrangegoingonhere"
def test_zero_width_joiners_kept(title_schema):
"""Test that emojis are parsed correctly"""
title = "🤷🤷‍♂️🤷‍♀️🤷🏻🤷🏻‍♀️🤷🏻‍♂️🤷🏼🤷🏼‍♀️🤷🏼‍♂️🤷🏽🤷🏽‍♀️🤷🏽‍♂️🤷🏾🤷🏾‍♀️🤷🏾‍♂️🤷🏿🤷🏿‍♀️🤷🏿‍♂️"
result = title_schema.load({"title": title})
assert (
result["title"]
== "🤷🤷‍♂️🤷‍♀️🤷🏻🤷🏻‍♀️🤷🏻‍♂️🤷🏼🤷🏼‍♀️🤷🏼‍♂️🤷🏽🤷🏽‍♀️🤷🏽‍♂️🤷🏾🤷🏾‍♀️🤷🏾‍♂️🤷🏿🤷🏿‍♀️🤷🏿‍♂️"
)

5
tildes/tildes/lib/string.py

@ -168,6 +168,9 @@ def simplify_string(original: str) -> str:
# replace consecutive spaces with a single space
simplified = re.sub(r"\s{2,}", " ", simplified)
# replace consecutive ZWJ with a single ZWJ
simplified = re.sub(r"\u200D{2,}", "\u200D", simplified)
# remove any remaining leading/trailing whitespace
simplified = simplified.strip()
@ -189,6 +192,8 @@ def _sanitize_characters(original: str) -> str:
# newlines, which are replaced with normal spaces
if char == "\n":
final_characters.append(" ")
elif char == "\u200D":
final_characters.append("\u200D")
else:
# any other type of character, just keep it
final_characters.append(char)

9
tildes/tildes/schemas/fields.py

@ -113,7 +113,7 @@ class Markdown(Field):
"""Validate the value is acceptable for a markdown field."""
super()._validate(value)
if value.isspace():
if value.replace("\u200D", " ").isspace():
raise ValidationError("Cannot be entirely whitespace.")
def _deserialize(
@ -153,6 +153,13 @@ class SimpleString(Field):
super().__init__(validate=Length(min=1, max=max_length), **kwargs)
def _validate(self, value: str) -> None:
"""Validate the value is acceptable for a simple string field."""
super()._validate(value)
if value.replace("\u200D", " ").isspace():
raise ValidationError("Cannot be entirely whitespace.")
def _deserialize(
self,
value: str,

Loading…
Cancel
Save