Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,12 @@
|
|
|
|
|
|
1 |
# ================================================================
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
5 |
# ================================================================
|
6 |
-
#
|
7 |
-
# -------------------------
|
8 |
-
# PDF
|
9 |
-
# -------------------------
|
10 |
|
|
|
11 |
# To read the PDF
|
12 |
import PyPDF2
|
13 |
# To analyze the PDF layout and extract text
|
@@ -20,9 +19,19 @@ from PIL import Image
|
|
20 |
from pdf2image import convert_from_path
|
21 |
# To perform OCR to extract text from images
|
22 |
import pytesseract
|
|
|
23 |
# To remove the additional created files
|
24 |
import os
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# -----------------------------------------------------------------------------
|
27 |
# Create a function to extract text
|
28 |
|
@@ -263,12 +272,7 @@ pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
|
263 |
#
|
264 |
# =======================================
|
265 |
def sentence_to_audio(fileobj):
|
266 |
-
|
267 |
-
import torch
|
268 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
269 |
-
from transformers import pipeline
|
270 |
-
import numpy as np
|
271 |
-
import scipy
|
272 |
|
273 |
|
274 |
# text mining from pdf
|
@@ -308,38 +312,19 @@ def sentence_to_audio(fileobj):
|
|
308 |
scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
|
309 |
return "s_2_s.wav",summary_text
|
310 |
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
# processor = AutoProcessor.from_pretrained("suno/bark-small")
|
315 |
-
# model = AutoModel.from_pretrained("suno/bark-small")
|
316 |
-
# inputs = processor(
|
317 |
-
# text=summary_text,
|
318 |
-
# return_tensors="pt",
|
319 |
-
# )
|
320 |
-
# speech_values = model.generate(**inputs, do_sample=True)
|
321 |
-
# sampling_rate = model.generation_config.sample_rate
|
322 |
-
# return sampling_rate, speech_values.cpu().numpy().squeeze(),summary_text
|
323 |
-
|
324 |
-
|
325 |
-
# ============================================================================================
|
326 |
-
|
327 |
-
|
328 |
-
# =======================================
|
329 |
-
|
330 |
-
import gradio as gr
|
331 |
-
from transformers import pipeline, AutoProcessor, AutoModel
|
332 |
-
from transformers import pipeline
|
333 |
-
|
334 |
# ===========================================================
|
335 |
|
336 |
#summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
|
337 |
-
#sentence_to_audio(summary_txt)
|
338 |
|
339 |
pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
|
340 |
#pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
341 |
pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
|
342 |
|
343 |
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
345 |
demo.launch(share=True)
|
|
|
1 |
+
# https://huggingface.co/spaces/FlavioBF/AI_in_production_PRJs
|
2 |
+
|
3 |
# ================================================================
|
4 |
+
#
|
5 |
+
# import
|
6 |
+
#
|
7 |
# ================================================================
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
#PDF PROCESSING
|
10 |
# To read the PDF
|
11 |
import PyPDF2
|
12 |
# To analyze the PDF layout and extract text
|
|
|
19 |
from pdf2image import convert_from_path
|
20 |
# To perform OCR to extract text from images
|
21 |
import pytesseract
|
22 |
+
|
23 |
# To remove the additional created files
|
24 |
import os
|
25 |
|
26 |
+
#SUMMARIZATION AND AUDIO PROCESSING
|
27 |
+
import torch
|
28 |
+
import numpy as np
|
29 |
+
import scipy
|
30 |
+
import gradio as gr
|
31 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
32 |
+
from transformers import pipeline, AutoProcessor, AutoModel
|
33 |
+
from transformers import pipeline
|
34 |
+
|
35 |
# -----------------------------------------------------------------------------
|
36 |
# Create a function to extract text
|
37 |
|
|
|
272 |
#
|
273 |
# =======================================
|
274 |
def sentence_to_audio(fileobj):
|
275 |
+
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
|
278 |
# text mining from pdf
|
|
|
312 |
scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
|
313 |
return "s_2_s.wav",summary_text
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
# ===========================================================
|
316 |
|
317 |
#summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
|
|
|
318 |
|
319 |
pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
|
320 |
#pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
321 |
pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
|
322 |
|
323 |
|
324 |
+
|
325 |
+
#iface = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",gr.Textbox(lines=4,label="one sentence summ.")],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
|
326 |
+
#iface.launch(share=True)
|
327 |
+
|
328 |
+
|
329 |
+
demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",,gr.Textbox(lines=4,label="one sentence summ.")],examples=[pdf_path,pdf_path2],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
|
330 |
demo.launch(share=True)
|