Ilde's picture
added examples
a6e2157
raw
history blame
1.8 kB
from gensim.models.keyedvectors import KeyedVectors
import pickle
from nltk.tokenize import word_tokenize
import gradio as gr
# Use gensim Keyvectors to read the embbedings
wordvectors_file_vec = 'smaller_model_spa.txt'
smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec)
with open('stop_words.pkl', 'rb') as f:
stop_words = pickle.load(f)
def filter_words(x):
word_tokens = x.split(' ') # shitty tokenization because ntlk tokenize on hf is working weeeird
filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
return filtered_sentence
def reverse_dictionary(definicion):
words = filter_words(definicion)
list_similar = smaller_model.most_similar_cosmul(positive= words)
return list_to_html(list_similar)
def list_to_html(lst, title="Results"):
html_str = f"<h3>{title}</h3><ul>"
for word, score in lst:
html_str += f"<li><b>{word}</b>: {score:.2f}</li>"
html_str += "</ul>"
return html_str
title = "Diccionario inverso en español"
description = "Un diccionario inverso utilizando embeddings Word2Vec de SBWCA y filtrando palabras de Wikcionario. Creado como demo para Gradio y HuggingFace Spaces."
examples = ['angustia porque se te olvido algo',
'actor mujer',
'tardanza o lentitud con que se hace algo',
'miedo a las alturas',
'vehículo que anda bajo el agua',
'grupo de lobos que andan juntos']
gr.Interface(fn = reverse_dictionary,
inputs = gr.inputs.Textbox(lines=5, placeholder="Enter your text here..."),
outputs= gr.outputs.HTML(),
title = title,
description = description,
examples = examples).launch()