Spaces:

Einmalumdiewelt
/

German_text_summarization

Build error

App Files Files Community

Einmalumdiewelt commited on Aug 10, 2022

Commit

2241007

1 Parent(s): c38dbf5

Update app.py

Browse files

added sliders and different models

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -3,14 +3,22 @@ import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-tokenizer = AutoTokenizer.from_pretrained("Einmalumdiewelt/T5-Base_GNAD")
-model = AutoModelForSeq2SeqLM.from_pretrained("Einmalumdiewelt/T5-Base_GNAD")
-device = "cpu"
-#"cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
-def summarize(inputs):
   #define model inputs
   inputs = tokenizer(
         inputs,
@@ -19,7 +27,7 @@ def summarize(inputs):
         padding="max_length",
         return_tensors='pt').to(device)
   #generate preds
-  preds = model.generate(**inputs,max_length=200,min_length=100)
   #we decode the predictions to store them
   decoded_predictions = tokenizer.batch_decode(preds, skip_special_tokens=True)
   #return
@@ -35,17 +43,24 @@ examples = [["summarize: Maschinelles Lernen ist ein Oberbegriff für die „kü
 #            title=title,
 #            description=description,
 #            examples=examples)
-txt=gr.Textbox(lines=30, label="German", placeholder="Paste your German text in here")
-out=gr.Textbox(lines=10, label="Summary")
 interface = gr.Interface(summarize,
             inputs=txt,
-            outputs=out,
             title=title,
             description=description,
-            examples=examples)
-interface.launch(share=True)

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+def summarize(inputs,model=model,summary_length=200):
+  if model=="T5-base":
+        tokenizer = AutoTokenizer.from_pretrained("Einmalumdiewelt/T5-Base_GNAD")
+        model = AutoModelForSeq2SeqLM.from_pretrained("Einmalumdiewelt/T5-Base_GNAD")
+  elif model =="Google pegasus":
+        tokenizer = AutoTokenizer.from_pretrained("Einmalumdiewelt/PegasusXSUM_GNAD")
+        model = AutoModelForSeq2SeqLM.from_pretrained("Einmalumdiewelt/PegasusXSUM_GNAD")
+  elif model =="Facebook bart-large":
+        tokenizer = AutoTokenizer.from_pretrained("Einmalumdiewelt/BART_large_CNN_GNAD")
+        model = AutoModelForSeq2SeqLM.from_pretrained("Einmalumdiewelt/BART_large_CNN_GNAD")
+  device = "CPU"
+  #"cuda" if torch.cuda.is_available() else "CPU"
+  model.to(device)
   #define model inputs
   inputs = tokenizer(
         inputs,
         padding="max_length",
         return_tensors='pt').to(device)
   #generate preds
+  preds = model.generate(**inputs,max_length=summary_length,min_length=30)
   #we decode the predictions to store them
   decoded_predictions = tokenizer.batch_decode(preds, skip_special_tokens=True)
   #return
 #            title=title,
 #            description=description,
 #            examples=examples)
+txt=gr.Textbox(lines=15, label="I want to summarize this:", placeholder="Paste your German text in here. Don't forget to add the prefix "summarize: " for T5-base architecture.")
+out=gr.Textbox(lines=5, label="Here's your summary:")
 interface = gr.Interface(summarize,
+            [
+            # input
             inputs=txt,
+            # Selection of models for inference
+            gr.Dropdown(["T5-base", "Google pegasus", "Facebook bart-large"]),
+            # Length of summaries
+            gr.Slider(50, 250, step=50, label="summary length", value=150),
+            # ouptut
+            outputs=out
+            ],
             title=title,
             description=description,
+            examples=examples)
+# launch interface
+if __name__ == "__main__":
+    interface.launch(share=True)