from unicodedata import category
def find_words(full_text: str) -> list[str]:
words_list = []
current_word = ""
for char in full_text:
is_unicode_letter: bool = char == "_" or category(char).startswith("L")
if is_unicode_letter:
current_word += char
continue
word_is_empty: bool = not current_word
if word_is_empty:
continue
words_list.append(current_word)
current_word = ""
word_left = bool(current_word)
if word_left:
words_list.append(current_word)
return words_list
def find_autocompletions(
expected_keywords: list[str], full_text: str, original_word: str
) -> list[str]:
"""Returns a list of autocompletions based on the word"""
words_in_text: list[str] = find_words(full_text)
words_after_original_removal = [
word for word in words_in_text if word != original_word
]
no_usable_words_in_text: bool = not words_after_original_removal
if no_usable_words_in_text:
words_after_original_removal = expected_keywords
relevant_words = [
word for word in words_after_original_removal if word.startswith(original_word)
]
autocomplete_matches = sorted(
set(relevant_words), key=(lambda s: (-relevant_words.count(s), -len(s), s))
)
return autocomplete_matches
assert find_words("Hello this is a variable_name 用户数量") == [
"Hello",
"this",
"is",
"a",
"variable_name",
"用户数量",
]
assert find_autocompletions([], "Long text!", "te") == ["text"]