Spaces:

miracFence
/

generator_es_test

Runtime error

File size: 5,734 Bytes

# -*- coding: utf-8 -*-
"""ABSTRACTGEN_ES FINAL.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF

# installs
"""

import os 
os.system('pip install gpt_2_simple')
os.system('pip install os.system')
os.system('pip install gradio')
os.system('pip install huggingface_hub')
os.system('pip install easynmt')
os.system('pip install sentence-transformers')
os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash')
os.system('apt-get install git-lfs')
os.system('git lfs install')
os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ')
#os.system('cd AbstractGeneratorES')
print(os.getcwd())
print(os.listdir())
# Commented out IPython magic to ensure Python compatibility.
# %cd '/content/AbstractGeneratorES'

"""# Init"""

import gpt_2_simple as gpt2
import os
import tensorflow as tf
import pandas as pd
import re

model_name = "124M"
if not os.path.isdir(os.path.join("models", model_name)):
	print(f"Downloading {model_name} model...")
	gpt2.download_gpt2(model_name=model_name)

path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/'
checkpoint_dir =path+'weights/'
data_path = path+'TrainigData/'



file_name_en = 'en'
file_path_en = data_path+file_name_en

file_name_es = 'es'
file_path_es = data_path+file_name_es


prefix= '<|startoftext|>'
sufix ='<|endoftext|>'

import gradio as gr
import random
from easynmt import EasyNMT

from sentence_transformers import SentenceTransformer, util

def generateAbstract(text):
    tf.compat.v1.reset_default_graph()
    sess = gpt2.start_tf_sess()
    gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')
    txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0]
    return txt
def removeAbstract(text):
    p = text.find("Introducción")
    p2 = text.find("INTRODUCCIÓN")
    print(p,p2)
    if(p != -1):        
       return (text[:p] , text[p:] )  
    if(p2 != -1):        
       return (text[:p2] , text[p2:] )  

def generated_similarity(type_of_input, cn_text): 
  if(type_of_input == "English"):
    tf.compat.v1.reset_default_graph()
    model2 = EasyNMT('opus-mt')
    cn_text = model2.translate(cn_text, target_lang='es')


    print(cn_text)
    abstract_original , body  = removeAbstract(cn_text)
    tf.compat.v1.reset_default_graph()

    generated_Abstract = generateAbstract(body)

    sentences = [abstract_original, generated_Abstract]

    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  

    #Compute embedding for both lists
    embedding_1= model.encode(sentences[0], convert_to_tensor=True)
    embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

    generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
    ## tensor([[0.6003]])
    return f'''TEXTO SIN ABSTRACT\n
        {body}\n
        ABSTRACT ORIGINAL\n
        {abstract_original}\n
        ABSTRACT GENERADO\n
        {generated_Abstract}\n
        SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
        '''
  elif type_of_input == "Spanish":
    abstract_original , body  = removeAbstract(cn_text)
    tf.compat.v1.reset_default_graph()

    generated_Abstract = generateAbstract(body)

    sentences = [abstract_original, generated_Abstract]

    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  

    #Compute embedding for both lists
    embedding_1= model.encode(sentences[0], convert_to_tensor=True)
    embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

    generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
    return f'''TEXTO SIN ABSTRACT\n
    {body}\n
    ABSTRACT ORIGINAL\n
    {abstract_original}\n
    ABSTRACT GENERADO\n
    {generated_Abstract}\n
    SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
    '''
def generated_abstract(type_of_input, cn_text):
  if type_of_input == "English":
    tf.compat.v1.reset_default_graph()
    model2 = EasyNMT('opus-mt')
    cn_text = model2.translate(cn_text, target_lang='es')
    generated_Abstract = generateAbstract(cn_text)
    return f'''TEXTO SIN ABSTRACT\n
    {cn_text}\n
    ABSTRACT GENERADO\n
    {generated_Abstract}\n
    '''
  elif type_of_input == "Spanish":
    tf.compat.v1.reset_default_graph()
    generated_Abstract = generateAbstract(cn_text)
    return f'''TEXTO SIN ABSTRACT\n
    {cn_text}\n
    ABSTRACT GENERADO\n
    {generated_Abstract}\n
    '''

block = gr.Blocks()

with block:
    gr.Markdown("<h1>ABSTRACTGEN_ES</h1>")
    with gr.Tab("Full text and text similarity"):
        gr.Markdown("Choose language:")
        type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
        with gr.Row():
            cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7)
        with gr.Row():
            cn_results1 = gr.outputs.Textbox(label="Abstract generado")
        cn_run = gr.Button("Run")
        cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1])

    with gr.Tab("Only text with no abstract"):
        gr.Markdown("Choose language:")
        type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
        with gr.Row():
            cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7)
        with gr.Row():
            cn_results1 = gr.outputs.Textbox(label="Abstract generado")
        cn_run = gr.Button("Run")
        cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1)

block.launch(debug = True)