From 6f7618d1a10707f9b12d93b390051fb3b2644dcc Mon Sep 17 00:00:00 2001 From: Deimos Date: Sun, 14 Mar 2021 18:36:11 -0600 Subject: [PATCH] Adjust zero-width joiner check to fix IndexError There was the potential for an IndexError here, with a string that started with a zero-width joiner and had at least one more character afterwards. --- tildes/tildes/lib/string.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tildes/tildes/lib/string.py b/tildes/tildes/lib/string.py index d012807..74e3181 100644 --- a/tildes/tildes/lib/string.py +++ b/tildes/tildes/lib/string.py @@ -189,19 +189,18 @@ def _sanitize_characters(original: str) -> str: # newlines, which are replaced with normal spaces if char == "\n": final_characters.append(" ") - elif char == "\u200D": - final_length = len(final_characters) - # only check for the ZWJ if it's between two characters - if final_length <= index < len(original) - 1: - char_before_category = unicodedata.category( - final_characters[final_length - 1] - ) - char_after_category = unicodedata.category(original[index + 1]) - # only keep the ZWJ if it's between two symbol characters - if char_before_category.startswith( - "S" - ) and char_after_category.startswith("S"): - final_characters.append("\u200D") + + # Keep zero-width joiner only if it's between two symbol characters, so we + # don't break certain emoji variants + if char == "\u200D": + try: + before_category = unicodedata.category(final_characters[-1]) + after_category = unicodedata.category(original[index + 1]) + except IndexError: + continue + + if before_category.startswith("S") and after_category.startswith("S"): + final_characters.append(char) else: # any other type of character, just keep it final_characters.append(char)