shrey-14 commited on
Commit
a274d7c
·
verified ·
1 Parent(s): 242eda4

Final commit

Browse files
Files changed (4) hide show
  1. app.py +82 -0
  2. rouge_scores.csv +5 -0
  3. summaries_results.csv +0 -0
  4. text-summarization.ipynb +0 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
2
+ import gradio as gr
3
+ import re
4
+
5
+ model_link = "text_summary_model"
6
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_link)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_link)
8
+
9
+ pipe = pipeline('summarization', model=model, tokenizer=tokenizer)
10
+ gen_kwargs = {'length_penalty': 0.8, 'num_beams': 8, "min_length": 30}
11
+
12
+
13
+ def dummy_summarize(text):
14
+ text = clean_text(text)
15
+ return pipe(text, **gen_kwargs)[0]['summary_text']
16
+
17
+
18
+ def clean_text(text):
19
+ # Remove Byte Order Marks (BOM)
20
+ text = text.replace('\ufeff', '')
21
+ # Replace \n with a space
22
+ text = text.replace('\n', ' ')
23
+ # Replace \r with a space
24
+ text = text.replace('\r', ' ')
25
+ # Remove backslashes
26
+ text = text.replace('\\', '')
27
+ # Remove Non-ASCII characters
28
+ text = re.sub(r'[^\x00-\x7F]+', '', text)
29
+ # Remove Non-printable characters
30
+ text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
31
+ # Normalize whitespace
32
+ text = ' '.join(text.split())
33
+ return text
34
+
35
+
36
+ # Custom HTML and CSS for the title and theme
37
+ title_html = """
38
+ <h1 style="font-size: 40px; text-align: center; color: white;">Text Summarization</h1>
39
+ <p style="text-align: center; color: white; font-size:20px">Enter text to summarize it using a pretrained model.</p>
40
+ """
41
+
42
+ css = """
43
+ body {
44
+ background-color: #007BFF;
45
+ color: white;
46
+ }
47
+
48
+ .gr-textbox textarea {
49
+ background-color: #0056b3;
50
+ color: white;
51
+ border: 2px solid white;
52
+ }
53
+
54
+ .gr-button {
55
+ background-color: #0056b3;
56
+ color: white;
57
+ border: 2px solid white;
58
+ }
59
+
60
+ .gr-button:hover {
61
+ background-color: #004080;
62
+ }
63
+
64
+ .gr-textbox input {
65
+ background-color: #0056b3;
66
+ color: white;
67
+ border: 2px solid white;
68
+ }
69
+ """
70
+
71
+ # Create a Gradio interface with large input and output textboxes
72
+ interface = gr.Interface(
73
+ fn=dummy_summarize,
74
+ inputs=gr.Textbox(lines=15, placeholder="Enter text here..."),
75
+ outputs=gr.Textbox(lines=15, placeholder="Summary will appear here..."),
76
+ description=title_html,
77
+ theme="compact", # Use compact theme to reduce padding
78
+ css=css
79
+ )
80
+
81
+ # Launch the interface
82
+ interface.launch()
rouge_scores.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ metric,score
2
+ rouge1,"Score(precision=0.5240761726639174, recall=0.5164609406411964, fmeasure=0.48813699709755287)"
3
+ rouge2,"Score(precision=0.2750003463132431, recall=0.26328277298441694, fmeasure=0.2498681871552253)"
4
+ rougeL,"Score(precision=0.4338563417029981, recall=0.42507261340158575, fmeasure=0.40241707690768413)"
5
+ rougeLsum,"Score(precision=0.4330077769824946, recall=0.42511422355384887, fmeasure=0.4021251370419605)"
summaries_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
text-summarization.ipynb ADDED
The diff for this file is too large to render. See raw diff