FlavioBF commited on
Commit
7d9efc2
·
1 Parent(s): aeb226b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -38
app.py CHANGED
@@ -1,13 +1,12 @@
 
 
1
  # ================================================================
2
- # TESTING VERSION
3
- # ALL-IN-ONE CELL VERSION
4
- # OF THE PROGRAM
5
  # ================================================================
6
- #
7
- # -------------------------
8
- # PDF
9
- # -------------------------
10
 
 
11
  # To read the PDF
12
  import PyPDF2
13
  # To analyze the PDF layout and extract text
@@ -20,9 +19,19 @@ from PIL import Image
20
  from pdf2image import convert_from_path
21
  # To perform OCR to extract text from images
22
  import pytesseract
 
23
  # To remove the additional created files
24
  import os
25
 
 
 
 
 
 
 
 
 
 
26
  # -----------------------------------------------------------------------------
27
  # Create a function to extract text
28
 
@@ -263,12 +272,7 @@ pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
263
  #
264
  # =======================================
265
  def sentence_to_audio(fileobj):
266
- from transformers import pipeline, AutoProcessor, AutoModel
267
- import torch
268
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
269
- from transformers import pipeline
270
- import numpy as np
271
- import scipy
272
 
273
 
274
  # text mining from pdf
@@ -308,38 +312,19 @@ def sentence_to_audio(fileobj):
308
  scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
309
  return "s_2_s.wav",summary_text
310
 
311
-
312
-
313
-
314
- # processor = AutoProcessor.from_pretrained("suno/bark-small")
315
- # model = AutoModel.from_pretrained("suno/bark-small")
316
- # inputs = processor(
317
- # text=summary_text,
318
- # return_tensors="pt",
319
- # )
320
- # speech_values = model.generate(**inputs, do_sample=True)
321
- # sampling_rate = model.generation_config.sample_rate
322
- # return sampling_rate, speech_values.cpu().numpy().squeeze(),summary_text
323
-
324
-
325
- # ============================================================================================
326
-
327
-
328
- # =======================================
329
-
330
- import gradio as gr
331
- from transformers import pipeline, AutoProcessor, AutoModel
332
- from transformers import pipeline
333
-
334
  # ===========================================================
335
 
336
  #summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
337
- #sentence_to_audio(summary_txt)
338
 
339
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
340
  #pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
341
  pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
342
 
343
 
344
- demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio","text"],examples=[pdf_path,pdf_path2])
 
 
 
 
 
345
  demo.launch(share=True)
 
1
+ # https://huggingface.co/spaces/FlavioBF/AI_in_production_PRJs
2
+
3
  # ================================================================
4
+ #
5
+ # import
6
+ #
7
  # ================================================================
 
 
 
 
8
 
9
+ #PDF PROCESSING
10
  # To read the PDF
11
  import PyPDF2
12
  # To analyze the PDF layout and extract text
 
19
  from pdf2image import convert_from_path
20
  # To perform OCR to extract text from images
21
  import pytesseract
22
+
23
  # To remove the additional created files
24
  import os
25
 
26
+ #SUMMARIZATION AND AUDIO PROCESSING
27
+ import torch
28
+ import numpy as np
29
+ import scipy
30
+ import gradio as gr
31
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
32
+ from transformers import pipeline, AutoProcessor, AutoModel
33
+ from transformers import pipeline
34
+
35
  # -----------------------------------------------------------------------------
36
  # Create a function to extract text
37
 
 
272
  #
273
  # =======================================
274
  def sentence_to_audio(fileobj):
275
+
 
 
 
 
 
276
 
277
 
278
  # text mining from pdf
 
312
  scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
313
  return "s_2_s.wav",summary_text
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  # ===========================================================
316
 
317
  #summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
 
318
 
319
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
320
  #pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
321
  pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
322
 
323
 
324
+
325
+ #iface = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",gr.Textbox(lines=4,label="one sentence summ.")],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
326
+ #iface.launch(share=True)
327
+
328
+
329
+ demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",,gr.Textbox(lines=4,label="one sentence summ.")],examples=[pdf_path,pdf_path2],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
330
  demo.launch(share=True)