Removing Python dependencies

Any thoughts on ways to remove the python necessary parts (list comprehension, sorted, unicodedata) in the following so it can be converted seamlessly to Mojo?
from unicodedata import category


def find_words(full_text: str) -> list[str]:
words_list = []
current_word = ""

for char in full_text:

is_unicode_letter: bool = char == "_" or category(char).startswith("L")
if is_unicode_letter:
current_word += char
continue

word_is_empty: bool = not current_word
if word_is_empty:
continue

words_list.append(current_word)
current_word = ""

word_left = bool(current_word)
if word_left:
words_list.append(current_word)

return words_list


def find_autocompletions(
expected_keywords: list[str], full_text: str, original_word: str
) -> list[str]:
"""Returns a list of autocompletions based on the word"""

words_in_text: list[str] = find_words(full_text)

words_after_original_removal = [
word for word in words_in_text if word != original_word
]

no_usable_words_in_text: bool = not words_after_original_removal
if no_usable_words_in_text:
words_after_original_removal = expected_keywords

relevant_words = [
word for word in words_after_original_removal if word.startswith(original_word)
]

autocomplete_matches = sorted(
set(relevant_words), key=(lambda s: (-relevant_words.count(s), -len(s), s))
)

return autocomplete_matches


assert find_words("Hello this is a variable_name 用户数量") == [
"Hello",
"this",
"is",
"a",
"variable_name",
"用户数量",
]
assert find_autocompletions([], "Long text!", "te") == ["text"]
from unicodedata import category


def find_words(full_text: str) -> list[str]:
words_list = []
current_word = ""

for char in full_text:

is_unicode_letter: bool = char == "_" or category(char).startswith("L")
if is_unicode_letter:
current_word += char
continue

word_is_empty: bool = not current_word
if word_is_empty:
continue

words_list.append(current_word)
current_word = ""

word_left = bool(current_word)
if word_left:
words_list.append(current_word)

return words_list


def find_autocompletions(
expected_keywords: list[str], full_text: str, original_word: str
) -> list[str]:
"""Returns a list of autocompletions based on the word"""

words_in_text: list[str] = find_words(full_text)

words_after_original_removal = [
word for word in words_in_text if word != original_word
]

no_usable_words_in_text: bool = not words_after_original_removal
if no_usable_words_in_text:
words_after_original_removal = expected_keywords

relevant_words = [
word for word in words_after_original_removal if word.startswith(original_word)
]

autocomplete_matches = sorted(
set(relevant_words), key=(lambda s: (-relevant_words.count(s), -len(s), s))
)

return autocomplete_matches


assert find_words("Hello this is a variable_name 用户数量") == [
"Hello",
"this",
"is",
"a",
"variable_name",
"用户数量",
]
assert find_autocompletions([], "Long text!", "te") == ["text"]
2 Replies
benny
benny7mo ago
Im on my phone so the indenting is weird, what is the intended functionality of this?
Moosems / Three chickens
It searches for all words in a given piece of text (all non-separated Unicode letters) and checks if any words would match for autocomplete (if it starts with the given typed portion of a word) and if there’s nothing already in the file that matches it’ll also check against the builtin keywords that are given as a list.
Want results from more Discord servers?
Add your server