Modular•12mo ago

Removing Python dependencies

Any thoughts on ways to remove the python necessary parts (list comprehension, sorted, unicodedata) in the following so it can be converted seamlessly to Mojo?

from unicodedata import category


def find_words(full_text: str) -> list[str]:
    words_list = []
    current_word = ""

    for char in full_text:

        is_unicode_letter: bool = char == "_" or category(char).startswith("L")
        if is_unicode_letter:
            current_word += char
            continue

        word_is_empty: bool = not current_word
        if word_is_empty:
            continue

        words_list.append(current_word)
        current_word = ""

    word_left = bool(current_word)
    if word_left:
        words_list.append(current_word)

    return words_list


def find_autocompletions(
    expected_keywords: list[str], full_text: str, original_word: str
) -> list[str]:
    """Returns a list of autocompletions based on the word"""

    words_in_text: list[str] = find_words(full_text)

    words_after_original_removal = [
        word for word in words_in_text if word != original_word
    ]

    no_usable_words_in_text: bool = not words_after_original_removal
    if no_usable_words_in_text:
        words_after_original_removal = expected_keywords

    relevant_words = [
        word for word in words_after_original_removal if word.startswith(original_word)
    ]

    autocomplete_matches = sorted(
        set(relevant_words), key=(lambda s: (-relevant_words.count(s), -len(s), s))
    )

    return autocomplete_matches


assert find_words("Hello this is a variable_name 用户数量") == [
    "Hello",
    "this",
    "is",
    "a",
    "variable_name",
    "用户数量",
]
assert find_autocompletions([], "Long text!", "te") == ["text"]

from unicodedata import category


def find_words(full_text: str) -> list[str]:
    words_list = []
    current_word = ""

    for char in full_text:

        is_unicode_letter: bool = char == "_" or category(char).startswith("L")
        if is_unicode_letter:
            current_word += char
            continue

        word_is_empty: bool = not current_word
        if word_is_empty:
            continue

        words_list.append(current_word)
        current_word = ""

    word_left = bool(current_word)
    if word_left:
        words_list.append(current_word)

    return words_list


def find_autocompletions(
    expected_keywords: list[str], full_text: str, original_word: str
) -> list[str]:
    """Returns a list of autocompletions based on the word"""

    words_in_text: list[str] = find_words(full_text)

    words_after_original_removal = [
        word for word in words_in_text if word != original_word
    ]

    no_usable_words_in_text: bool = not words_after_original_removal
    if no_usable_words_in_text:
        words_after_original_removal = expected_keywords

    relevant_words = [
        word for word in words_after_original_removal if word.startswith(original_word)
    ]

    autocomplete_matches = sorted(
        set(relevant_words), key=(lambda s: (-relevant_words.count(s), -len(s), s))
    )

    return autocomplete_matches


assert find_words("Hello this is a variable_name 用户数量") == [
    "Hello",
    "this",
    "is",
    "a",
    "variable_name",
    "用户数量",
]
assert find_autocompletions([], "Long text!", "te") == ["text"]

2 Replies

benny•12mo ago

Im on my phone so the indenting is weird, what is the intended functionality of this?

moosems_yeehawOP•12mo ago

It searches for all words in a given piece of text (all non-separated Unicode letters) and checks if any words would match for autocomplete (if it starts with the given typed portion of a word) and if there’s nothing already in the file that matches it’ll also check against the builtin keywords that are given as a list.

Gaming

Programming

Removing Python dependencies

Did you find this page helpful?