Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""ABSTRACTGEN_ES FINAL.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF | |
# installs | |
""" | |
! pip install gpt_2_simple | |
! pip install tensorflow-estimator==1.15.1 | |
! pip install gradio | |
! pip install huggingface_hub | |
! pip install easynmt | |
! pip install -U sentence-transformers | |
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
!sudo apt-get install git-lfs | |
!git lfs install | |
!git clone https://huggingface.co/franz96521/AbstractGeneratorES | |
# Commented out IPython magic to ensure Python compatibility. | |
# %cd '/content/AbstractGeneratorES' | |
"""# Init""" | |
import gpt_2_simple as gpt2 | |
import os | |
import tensorflow as tf | |
import pandas as pd | |
import re | |
model_name = "124M" | |
if not os.path.isdir(os.path.join("models", model_name)): | |
print(f"Downloading {model_name} model...") | |
gpt2.download_gpt2(model_name=model_name) | |
path = 'AbstractGenerator/' | |
checkpoint_dir =path+'weights/' | |
data_path = path+'TrainigData/' | |
file_name_en = 'en' | |
file_path_en = data_path+file_name_en | |
file_name_es = 'es' | |
file_path_es = data_path+file_name_es | |
prefix= '<|startoftext|>' | |
sufix ='<|endoftext|>' | |
import gradio as gr | |
import random | |
from easynmt import EasyNMT | |
from sentence_transformers import SentenceTransformer, util | |
def generateAbstract(text): | |
tf.compat.v1.reset_default_graph() | |
sess = gpt2.start_tf_sess() | |
gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1') | |
txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0] | |
return txt | |
def removeAbstract(text): | |
p = text.find("Introducción") | |
p2 = text.find("INTRODUCCIÓN") | |
print(p,p2) | |
if(p != -1): | |
return (text[:p] , text[p:] ) | |
if(p2 != -1): | |
return (text[:p2] , text[p2:] ) | |
def generated_similarity(type_of_input, cn_text): | |
if(type_of_input == "English"): | |
tf.compat.v1.reset_default_graph() | |
model2 = EasyNMT('opus-mt') | |
cn_text = model2.translate(cn_text, target_lang='es') | |
print(cn_text) | |
abstract_original , body = removeAbstract(cn_text) | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(body) | |
sentences = [abstract_original, generated_Abstract] | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
#Compute embedding for both lists | |
embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
## tensor([[0.6003]]) | |
return f'''TEXTO SIN ABSTRACT\n | |
{body}\n | |
ABSTRACT ORIGINAL\n | |
{abstract_original}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
''' | |
elif type_of_input == "Spanish": | |
abstract_original , body = removeAbstract(cn_text) | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(body) | |
sentences = [abstract_original, generated_Abstract] | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
#Compute embedding for both lists | |
embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
return f'''TEXTO SIN ABSTRACT\n | |
{body}\n | |
ABSTRACT ORIGINAL\n | |
{abstract_original}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
''' | |
def generated_abstract(type_of_input, cn_text): | |
if type_of_input == "English": | |
tf.compat.v1.reset_default_graph() | |
model2 = EasyNMT('opus-mt') | |
cn_text = model2.translate(cn_text, target_lang='es') | |
generated_Abstract = generateAbstract(cn_text) | |
return f'''TEXTO SIN ABSTRACT\n | |
{cn_text}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
''' | |
elif type_of_input == "Spanish": | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(cn_text) | |
return f'''TEXTO SIN ABSTRACT\n | |
{cn_text}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
''' | |
block = gr.Blocks(theme="dark") | |
with block: | |
with gr.Tab("Full text and text similarity"): | |
type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
with gr.Row(): | |
cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7) | |
with gr.Row(): | |
cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
cn_run = gr.Button("Run") | |
cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1]) | |
with gr.Tab("Only text with no abstract"): | |
gr.Markdown("Choose the disease(s) to predict:") | |
type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
with gr.Row(): | |
cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7) | |
with gr.Row(): | |
cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
cn_run = gr.Button("Run") | |
cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1) | |
block.launch(debug = True) |