cdgranadillo's picture
Create app.py
b58669a
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sheet2api import Sheet2APIClient
from datetime import datetime
from custom_functions import preprocessing_text
client = Sheet2APIClient(api_url='https://sheet2api.com/v1/hwp4AVQlOawy/summarizer')
def generate_summary(text):
client.create_row(row={'Timestamp': str(datetime.now())})
clean_text = preprocessing_text(text)
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # use_fast was set to false
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
input_ids = tokenizer(
[WHITESPACE_HANDLER(clean_text)],
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512
)["input_ids"]
output_ids = model.generate(
input_ids=input_ids,
max_length=84,
no_repeat_ngram_size=2,
num_beams=4
)[0]
summary = tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
return summary
demo = gr.Interface(fn=generate_summary,
inputs=gr.Textbox(lines=10, placeholder="Insert the text here"),
outputs=gr.Textbox(lines=4)
)
demo.launch()