FlavioBF commited on
Commit
236e761
·
1 Parent(s): 95c12dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -53
app.py CHANGED
@@ -8,28 +8,6 @@
8
  # PDF
9
  # -------------------------
10
 
11
- #!pip install PyPDF2
12
- #!pip install pdfminer.six
13
- #!pip install pdfplumber
14
- #!pip install pdf2image
15
- #!pip install Pillow
16
- #!pip install pytesseract
17
- #!pip install poppler-utils
18
- #!pip install tesseract-ocr
19
- #!pip install libtesseract-dev
20
-
21
- #!pip install fastapi
22
- #!pip install -q torch
23
- #!pip install -q transformers
24
- #!pip install -q gradio
25
- #!pip install ffmpeg
26
-
27
-
28
- #!apt-get install poppler-utils
29
- #!apt install tesseract-ocr
30
- #!apt install libtesseract-dev
31
-
32
-
33
  # To read the PDF
34
  import PyPDF2
35
  # To analyze the PDF layout and extract text
@@ -281,35 +259,6 @@ pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-lea
281
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
282
 
283
 
284
- text_per_page = read_pdf(pdf_path)
285
-
286
- text_per_page.keys()
287
-
288
-
289
- page_1 = text_per_page['Page_0']
290
-
291
- # ============================================================================================
292
-
293
- # picking up the abstract from the first page content
294
- flag=False
295
- abstract_sect=""
296
-
297
- for i in range(len(page_1)):
298
- if page_1[0][i].strip()=="Abstract":
299
- flag=True
300
- if page_1[0][i].strip()=="1 Introduction":
301
- flag = False
302
- if flag:
303
- # abstract_sect contains the Abstract section content
304
- abstract_sect+=page_1[0][i]
305
-
306
-
307
- from transformers import pipeline
308
-
309
- summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
310
- summary=(summarizer(abstract_sect))
311
- summary_text=summary[0].get("summary_text")
312
- print(summary_text)
313
 
314
 
315
 
@@ -333,8 +282,39 @@ def sentence_to_audio(summary_txt):
333
  return sampling_rate, speech_values.cpu().numpy().squeeze()
334
 
335
 
336
- #summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
337
- sentence_to_audio(summary_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
340
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
 
8
  # PDF
9
  # -------------------------
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # To read the PDF
12
  import PyPDF2
13
  # To analyze the PDF layout and extract text
 
259
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
260
 
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
 
264
 
 
282
  return sampling_rate, speech_values.cpu().numpy().squeeze()
283
 
284
 
285
+ text_per_page = read_pdf(pdf_path)
286
+ text_per_page.keys()
287
+ page_1 = text_per_page['Page_0']
288
+
289
+ # ============================================================================================
290
+
291
+ # picking up the abstract from the first page content
292
+ #flag=False
293
+ #abstract_sect=""
294
+
295
+ #for i in range(len(page_1)):
296
+ # if page_1[0][i].strip()=="Abstract":
297
+ # flag=True
298
+ # if page_1[0][i].strip()=="1 Introduction":
299
+ # flag = False
300
+ # if flag:
301
+ # # abstract_sect contains the Abstract section content
302
+ # abstract_sect+=page_1[0][i]
303
+
304
+
305
+ #from transformers import pipeline
306
+ #
307
+ #summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
308
+ #summary=(summarizer(abstract_sect))
309
+ #summary_text=summary[0].get("summary_text")
310
+ #print(summary_text)
311
+
312
+
313
+ # ===========================================================
314
+
315
+ summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
316
+
317
+ sentence_to_audio(summary_txt)
318
 
319
  pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
320
  pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")