Spaces:
Sleeping
Sleeping
Final commit
Browse files- app.py +82 -0
- rouge_scores.csv +5 -0
- summaries_results.csv +0 -0
- text-summarization.ipynb +0 -0
app.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
2 |
+
import gradio as gr
|
3 |
+
import re
|
4 |
+
|
5 |
+
model_link = "text_summary_model"
|
6 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_link)
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_link)
|
8 |
+
|
9 |
+
pipe = pipeline('summarization', model=model, tokenizer=tokenizer)
|
10 |
+
gen_kwargs = {'length_penalty': 0.8, 'num_beams': 8, "min_length": 30}
|
11 |
+
|
12 |
+
|
13 |
+
def dummy_summarize(text):
|
14 |
+
text = clean_text(text)
|
15 |
+
return pipe(text, **gen_kwargs)[0]['summary_text']
|
16 |
+
|
17 |
+
|
18 |
+
def clean_text(text):
|
19 |
+
# Remove Byte Order Marks (BOM)
|
20 |
+
text = text.replace('\ufeff', '')
|
21 |
+
# Replace \n with a space
|
22 |
+
text = text.replace('\n', ' ')
|
23 |
+
# Replace \r with a space
|
24 |
+
text = text.replace('\r', ' ')
|
25 |
+
# Remove backslashes
|
26 |
+
text = text.replace('\\', '')
|
27 |
+
# Remove Non-ASCII characters
|
28 |
+
text = re.sub(r'[^\x00-\x7F]+', '', text)
|
29 |
+
# Remove Non-printable characters
|
30 |
+
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
|
31 |
+
# Normalize whitespace
|
32 |
+
text = ' '.join(text.split())
|
33 |
+
return text
|
34 |
+
|
35 |
+
|
36 |
+
# Custom HTML and CSS for the title and theme
|
37 |
+
title_html = """
|
38 |
+
<h1 style="font-size: 40px; text-align: center; color: white;">Text Summarization</h1>
|
39 |
+
<p style="text-align: center; color: white; font-size:20px">Enter text to summarize it using a pretrained model.</p>
|
40 |
+
"""
|
41 |
+
|
42 |
+
css = """
|
43 |
+
body {
|
44 |
+
background-color: #007BFF;
|
45 |
+
color: white;
|
46 |
+
}
|
47 |
+
|
48 |
+
.gr-textbox textarea {
|
49 |
+
background-color: #0056b3;
|
50 |
+
color: white;
|
51 |
+
border: 2px solid white;
|
52 |
+
}
|
53 |
+
|
54 |
+
.gr-button {
|
55 |
+
background-color: #0056b3;
|
56 |
+
color: white;
|
57 |
+
border: 2px solid white;
|
58 |
+
}
|
59 |
+
|
60 |
+
.gr-button:hover {
|
61 |
+
background-color: #004080;
|
62 |
+
}
|
63 |
+
|
64 |
+
.gr-textbox input {
|
65 |
+
background-color: #0056b3;
|
66 |
+
color: white;
|
67 |
+
border: 2px solid white;
|
68 |
+
}
|
69 |
+
"""
|
70 |
+
|
71 |
+
# Create a Gradio interface with large input and output textboxes
|
72 |
+
interface = gr.Interface(
|
73 |
+
fn=dummy_summarize,
|
74 |
+
inputs=gr.Textbox(lines=15, placeholder="Enter text here..."),
|
75 |
+
outputs=gr.Textbox(lines=15, placeholder="Summary will appear here..."),
|
76 |
+
description=title_html,
|
77 |
+
theme="compact", # Use compact theme to reduce padding
|
78 |
+
css=css
|
79 |
+
)
|
80 |
+
|
81 |
+
# Launch the interface
|
82 |
+
interface.launch()
|
rouge_scores.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
metric,score
|
2 |
+
rouge1,"Score(precision=0.5240761726639174, recall=0.5164609406411964, fmeasure=0.48813699709755287)"
|
3 |
+
rouge2,"Score(precision=0.2750003463132431, recall=0.26328277298441694, fmeasure=0.2498681871552253)"
|
4 |
+
rougeL,"Score(precision=0.4338563417029981, recall=0.42507261340158575, fmeasure=0.40241707690768413)"
|
5 |
+
rougeLsum,"Score(precision=0.4330077769824946, recall=0.42511422355384887, fmeasure=0.4021251370419605)"
|
summaries_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
text-summarization.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|