manuel-calzolari commited on
Commit
1b6419e
·
1 Parent(s): 1f89864

Upload assessment

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
Article 11 Hidden Technical Debt in Machine Learning Systems.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a67da09a8bd5ba9a3577176e30aa2fbd88534e6baf0bc31522b4999f643d2a1
3
+ size 165614
Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8187b33070ee9d6ccdca29bbcd9494875099927ff8c66d45b210dc612eae7a
3
+ size 3080026
Article 7 Efficient Estimation of Word Representations in Vector Space.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a44d7e22d2005752271c9cc1929c6462d4c8270916b063977992a883e3a54362
3
+ size 228716
app.py CHANGED
@@ -1,7 +1,130 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/spaces/manuel-calzolari/assessment3_part2
2
+
3
+ # Import modules
4
+ import re
5
+ import fitz
6
+ import scipy
7
+ import torch
8
+ from transformers import pipeline
9
  import gradio as gr
10
 
11
+ # Path of the audio file to save
12
+ TTS_AUDIO_PATH = "tts.wav"
13
+
14
+
15
+ def extract_text(pdf_path):
16
+ """
17
+ Function to extract text from PDF.
18
+ """
19
+ with fitz.open(pdf_path) as pdf:
20
+ text = ""
21
+ for i, page in enumerate(pdf, start=1):
22
+ text += page.get_text()
23
+ return text
24
+
25
+
26
+ def get_abstract(text):
27
+ """
28
+ Function to get the abstract:
29
+ - Remove the text before the abstract
30
+ - Remove the text after the abstract (after the next title)
31
+ - Remove new lines
32
+
33
+ This works for some tested PDFs but obviously may not work with every
34
+ possible layout.
35
+ """
36
+ abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1]
37
+ abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0]
38
+ abstract = abstract.replace("-\n", "").replace("\n", " ")
39
+ return abstract
40
+
41
+
42
+ def generate_summary(abstract):
43
+ """
44
+ Function to generate the summary.
45
+ Use a model trained to generate one-line summary based on abstract of papers
46
+ See: https://huggingface.co/snrspeaks/t5-one-line-summary
47
+ """
48
+ summarizer = pipeline(
49
+ "summarization",
50
+ model="snrspeaks/t5-one-line-summary",
51
+ device="cuda:0" if torch.cuda.is_available() else "cpu",
52
+ )
53
+
54
+ summary = summarizer(
55
+ abstract,
56
+ max_length=64, # Maximum length of the summary
57
+ )[0]["summary_text"]
58
+
59
+ # If it's not already there, add a full stop at the end of the summary
60
+ summary = summary if summary.endswith(".") else summary + "."
61
+
62
+ return summary
63
+
64
+
65
+ def generate_speech(summary):
66
+ """
67
+ Function to generate the speech (TTS model).
68
+ See: https://huggingface.co/suno/bark-small
69
+ Note 1: I get some PyTorch warnings but it seems to work.
70
+ Note 2: Sometimes (not always) this TTS model adds spurious sounds or words
71
+ at the end (or more rarely at the beginning) of the speech related to the
72
+ text being provided.
73
+ """
74
+ synthesiser = pipeline(
75
+ "text-to-speech",
76
+ model="suno/bark-small",
77
+ device="cuda:0" if torch.cuda.is_available() else "cpu",
78
+ )
79
+
80
+ speech = synthesiser(
81
+ summary,
82
+ forward_params={"do_sample": True}, # From the bark-small usage example
83
+ )
84
+
85
+ return speech
86
+
87
+
88
+ def save_speech(speech, audio_path):
89
+ """
90
+ Function to save the speech to a WAV file (from the bark-small usage example)
91
+ """
92
+ scipy.io.wavfile.write(
93
+ audio_path,
94
+ rate=speech["sampling_rate"],
95
+ data=speech["audio"].T, # Transpose to get shape (n_samples, n_channels)
96
+ )
97
+
98
+
99
+ def synthesis(pdf_path):
100
+ try:
101
+ # Extract text from PDF
102
+ text = extract_text(pdf_path)
103
+ # Get the abstract
104
+ abstract = get_abstract(text)
105
+ except:
106
+ return "ERROR: ABSTRACT NOT FOUND!!!", None
107
+ # Generate the summary
108
+ summary = generate_summary(abstract)
109
+ # Generate the speech of the summary
110
+ speech = generate_speech(summary)
111
+ # Save the speech to a file
112
+ save_speech(speech, TTS_AUDIO_PATH)
113
+
114
+ return summary, TTS_AUDIO_PATH
115
+
116
 
117
+ # Build and launch the app
118
+ summary_tts = gr.Interface(
119
+ fn=synthesis,
120
+ inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"),
121
+ outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")],
122
+ title="PDF voice abstract summarization",
123
+ description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.",
124
+ examples=[
125
+ "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
126
+ "Article 7 Efficient Estimation of Word Representations in Vector Space.pdf",
127
+ "Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
128
+ ],
129
+ )
130
+ summary_tts.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ PyMuPDF
2
+ transformers
3
+ scipy
4
+ torch
5
+ # gradio is automatically made available in Hugging Face Spaces