FlavioBF commited on
Commit
ca856e4
·
1 Parent(s): 01162d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -269,7 +269,35 @@ from transformers import pipeline, AutoProcessor, AutoModel
269
  # =======================================
270
  #
271
  # =======================================
272
- def sentence_to_audio(summary_txt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  # Sentence 2 Speech
274
  processor = AutoProcessor.from_pretrained("suno/bark-small")
275
  model = AutoModel.from_pretrained("suno/bark-small")
@@ -282,42 +310,18 @@ def sentence_to_audio(summary_txt):
282
  return sampling_rate, speech_values.cpu().numpy().squeeze()
283
 
284
 
285
- #text_per_page = read_pdf(pdf_path)
286
- #text_per_page.keys()
287
- #page_1 = text_per_page['Page_0']
288
-
289
  # ============================================================================================
290
 
291
- # picking up the abstract from the first page content
292
- #flag=False
293
- #abstract_sect=""
294
-
295
- #for i in range(len(page_1)):
296
- # if page_1[0][i].strip()=="Abstract":
297
- # flag=True
298
- # if page_1[0][i].strip()=="1 Introduction":
299
- # flag = False
300
- # if flag:
301
- # # abstract_sect contains the Abstract section content
302
- # abstract_sect+=page_1[0][i]
303
-
304
-
305
- #from transformers import pipeline
306
- #
307
- #summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
308
- #summary=(summarizer(abstract_sect))
309
- #summary_text=summary[0].get("summary_text")
310
- #print(summary_text)
311
 
312
 
313
  # ===========================================================
314
 
315
- summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
316
 
317
  sentence_to_audio(summary_txt)
318
 
319
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
320
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
321
 
322
- demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs="audio",examples=[pdf_path,pdf_path2])
323
  demo.launch(share=True)
 
269
  # =======================================
270
  #
271
  # =======================================
272
+ def sentence_to_audio(fileobj):
273
+
274
+
275
+ from transformers import pipeline
276
+
277
+ # text mining from pdf
278
+ text_per_page = read_pdf(fileobj.name)
279
+ text_per_page.keys()
280
+ page_1 = text_per_page['Page_0']
281
+
282
+
283
+ # picking up the abstract from the first page content
284
+ flag=False
285
+ abstract_sect=""
286
+
287
+ for i in range(len(page_1)):
288
+ if page_1[0][i].strip()=="Abstract":
289
+ flag=True
290
+ if page_1[0][i].strip()=="1 Introduction":
291
+ flag = False
292
+ if flag:
293
+ # abstract_sect contains the Abstract section content
294
+ abstract_sect+=page_1[0][i]
295
+
296
+ # abstract summarization
297
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
298
+ summary=(summarizer(abstract_sect))
299
+ summary_text=summary[0].get("summary_text")
300
+
301
  # Sentence 2 Speech
302
  processor = AutoProcessor.from_pretrained("suno/bark-small")
303
  model = AutoModel.from_pretrained("suno/bark-small")
 
310
  return sampling_rate, speech_values.cpu().numpy().squeeze()
311
 
312
 
 
 
 
 
313
  # ============================================================================================
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
 
317
  # ===========================================================
318
 
319
+ #summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
320
 
321
  sentence_to_audio(summary_txt)
322
 
323
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
324
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
325
 
326
+ demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio","text"],examples=[pdf_path,pdf_path2])
327
  demo.launch(share=True)