Spaces:
Runtime error
Runtime error
File size: 1,797 Bytes
26166f1 73e6565 26166f1 effe832 26166f1 a6e2157 26166f1 ec25b10 26166f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from gensim.models.keyedvectors import KeyedVectors
import pickle
from nltk.tokenize import word_tokenize
import gradio as gr
# Use gensim Keyvectors to read the embbedings
wordvectors_file_vec = 'smaller_model_spa.txt'
smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec)
with open('stop_words.pkl', 'rb') as f:
stop_words = pickle.load(f)
def filter_words(x):
word_tokens = x.split(' ') # shitty tokenization because ntlk tokenize on hf is working weeeird
filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
return filtered_sentence
def reverse_dictionary(definicion):
words = filter_words(definicion)
list_similar = smaller_model.most_similar_cosmul(positive= words)
return list_to_html(list_similar)
def list_to_html(lst, title="Results"):
html_str = f"<h3>{title}</h3><ul>"
for word, score in lst:
html_str += f"<li><b>{word}</b>: {score:.2f}</li>"
html_str += "</ul>"
return html_str
title = "Diccionario inverso en español"
description = "Un diccionario inverso utilizando embeddings Word2Vec de SBWCA y filtrando palabras de Wikcionario. Creado como demo para Gradio y HuggingFace Spaces."
examples = ['angustia porque se te olvido algo',
'actor mujer',
'tardanza o lentitud con que se hace algo',
'miedo a las alturas',
'vehículo que anda bajo el agua',
'grupo de lobos que andan juntos']
gr.Interface(fn = reverse_dictionary,
inputs = gr.inputs.Textbox(lines=5, placeholder="Enter your text here..."),
outputs= gr.outputs.HTML(),
title = title,
description = description,
examples = examples).launch()
|