File size: 1,450 Bytes
b58669a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sheet2api import Sheet2APIClient
from datetime import datetime
from custom_functions import preprocessing_text

client = Sheet2APIClient(api_url='https://sheet2api.com/v1/hwp4AVQlOawy/summarizer')

def generate_summary(text):

    client.create_row(row={'Timestamp': str(datetime.now())})

    clean_text = preprocessing_text(text)

    WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))

    model_name = "csebuetnlp/mT5_multilingual_XLSum"
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # use_fast was set to false
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    input_ids = tokenizer(
        [WHITESPACE_HANDLER(clean_text)],
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=512
    )["input_ids"]

    output_ids = model.generate(
        input_ids=input_ids,
        max_length=84,
        no_repeat_ngram_size=2,
        num_beams=4
    )[0]

    summary = tokenizer.decode(
        output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )
    
    return summary


demo = gr.Interface(fn=generate_summary, 
                    inputs=gr.Textbox(lines=10, placeholder="Insert the text here"), 
                    outputs=gr.Textbox(lines=4)
                    )

demo.launch()