from gensim.models.keyedvectors import KeyedVectors import pickle from nltk.tokenize import word_tokenize import gradio as gr import nltk nltk.download('punkt') # Use gensim Keyvectors to read the embbedings wordvectors_file_vec = 'smaller_model_spa.txt' smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec) with open('stop_words.pkl', 'rb') as f: stop_words = pickle.load(f) def filter_words(x): word_tokens = word_tokenize(x, language = "spanish") filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words] return filtered_sentence def reverse_dictionary(definicion): words = filter_words(definicion) list_similar = smaller_model.most_similar_cosmul(positive= words) return list_to_html(list_similar) def list_to_html(lst, title="Results"): html_str = f"

{title}

" return html_str title = "Diccionario inverso en espaƱol" description = "Un diccionario inverso utilizando embeddings Word2Vec de SBWCA y filtrando palabras de Wikcionario. Creado como demo para Gradio y HuggingFace Spaces." examples = ['angustia porque se te olvido algo'] gr.Interface(fn = reverse_dictionary, inputs = gr.inputs.Textbox(lines=5, placeholder="Enter your text here..."), outputs= gr.outputs.HTML(), title = title, description = description, examples = examples).launch(share=True)