Browse Source

Adjust zero-width joiner check to fix IndexError

There was the potential for an IndexError here, with a string that
started with a zero-width joiner and had at least one more character
afterwards.
merge-requests/128/merge
Deimos 4 years ago
parent
commit
6f7618d1a1
  1. 25
      tildes/tildes/lib/string.py

25
tildes/tildes/lib/string.py

@ -189,19 +189,18 @@ def _sanitize_characters(original: str) -> str:
# newlines, which are replaced with normal spaces # newlines, which are replaced with normal spaces
if char == "\n": if char == "\n":
final_characters.append(" ") final_characters.append(" ")
elif char == "\u200D":
final_length = len(final_characters)
# only check for the ZWJ if it's between two characters
if final_length <= index < len(original) - 1:
char_before_category = unicodedata.category(
final_characters[final_length - 1]
)
char_after_category = unicodedata.category(original[index + 1])
# only keep the ZWJ if it's between two symbol characters
if char_before_category.startswith(
"S"
) and char_after_category.startswith("S"):
final_characters.append("\u200D")
# Keep zero-width joiner only if it's between two symbol characters, so we
# don't break certain emoji variants
if char == "\u200D":
try:
before_category = unicodedata.category(final_characters[-1])
after_category = unicodedata.category(original[index + 1])
except IndexError:
continue
if before_category.startswith("S") and after_category.startswith("S"):
final_characters.append(char)
else: else:
# any other type of character, just keep it # any other type of character, just keep it
final_characters.append(char) final_characters.append(char)

Loading…
Cancel
Save