diff --git a/tildes/tildes/lib/string.py b/tildes/tildes/lib/string.py index d012807..74e3181 100644 --- a/tildes/tildes/lib/string.py +++ b/tildes/tildes/lib/string.py @@ -189,19 +189,18 @@ def _sanitize_characters(original: str) -> str: # newlines, which are replaced with normal spaces if char == "\n": final_characters.append(" ") - elif char == "\u200D": - final_length = len(final_characters) - # only check for the ZWJ if it's between two characters - if final_length <= index < len(original) - 1: - char_before_category = unicodedata.category( - final_characters[final_length - 1] - ) - char_after_category = unicodedata.category(original[index + 1]) - # only keep the ZWJ if it's between two symbol characters - if char_before_category.startswith( - "S" - ) and char_after_category.startswith("S"): - final_characters.append("\u200D") + + # Keep zero-width joiner only if it's between two symbol characters, so we + # don't break certain emoji variants + if char == "\u200D": + try: + before_category = unicodedata.category(final_characters[-1]) + after_category = unicodedata.category(original[index + 1]) + except IndexError: + continue + + if before_category.startswith("S") and after_category.startswith("S"): + final_characters.append(char) else: # any other type of character, just keep it final_characters.append(char)