from gensim.models.keyedvectors import KeyedVectors import pickle from nltk.tokenize import word_tokenize import gradio as gr # Use gensim Keyvectors to read the embbedings wordvectors_file_vec = 'smaller_model_spa.txt' smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec) with open('stop_words.pkl', 'rb') as f: stop_words = pickle.load(f) def filter_words(x): word_tokens = x.split(' ') # shitty tokenization because ntlk tokenize on hf is working weeeird filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words] return filtered_sentence def reverse_dictionary(definicion): words = filter_words(definicion) list_similar = smaller_model.most_similar_cosmul(positive= words) return list_to_html(list_similar) def list_to_html(lst, title="Results"): html_str = f"