Spaces:

FlavioBF
/

AI_in_production_PRJs

Runtime error

App Files Files Community

FlavioBF commited on Dec 10, 2023

Commit

236e761

1 Parent(s): 95c12dd

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -53

app.py CHANGED Viewed

@@ -8,28 +8,6 @@
 # PDF
 # -------------------------
-#!pip install PyPDF2
-#!pip install pdfminer.six
-#!pip install pdfplumber
-#!pip install pdf2image
-#!pip install Pillow
-#!pip install pytesseract
-#!pip install poppler-utils
-#!pip install tesseract-ocr
-#!pip install libtesseract-dev
-#!pip install fastapi
-#!pip install -q torch
-#!pip install -q transformers
-#!pip install -q gradio
-#!pip install ffmpeg
-#!apt-get install poppler-utils
-#!apt install tesseract-ocr
-#!apt install libtesseract-dev
 # To read the PDF
 import PyPDF2
 # To analyze the PDF layout and extract text
@@ -281,35 +259,6 @@ pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-lea
 pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
-text_per_page = read_pdf(pdf_path)
-text_per_page.keys()
-page_1 = text_per_page['Page_0']
-# ============================================================================================
-# picking up the abstract from the first page content
-flag=False
-abstract_sect=""
-for i in range(len(page_1)):
-  if page_1[0][i].strip()=="Abstract":
-    flag=True
-  if page_1[0][i].strip()=="1 Introduction":
-    flag = False
-  if flag:
-    # abstract_sect contains the Abstract section content
-    abstract_sect+=page_1[0][i]
-from transformers import pipeline
-summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
-summary=(summarizer(abstract_sect))
-summary_text=summary[0].get("summary_text")
-print(summary_text)
@@ -333,8 +282,39 @@ def sentence_to_audio(summary_txt):
     return sampling_rate, speech_values.cpu().numpy().squeeze()
-#summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
-sentence_to_audio(summary_text)
 pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
 pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")

 # PDF
 # -------------------------
 # To read the PDF
 import PyPDF2
 # To analyze the PDF layout and extract text
 pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
     return sampling_rate, speech_values.cpu().numpy().squeeze()
+text_per_page = read_pdf(pdf_path)
+text_per_page.keys()
+page_1 = text_per_page['Page_0']
+# ============================================================================================
+# picking up the abstract from the first page content
+#flag=False
+#abstract_sect=""
+#for i in range(len(page_1)):
+#  if page_1[0][i].strip()=="Abstract":
+#    flag=True
+#  if page_1[0][i].strip()=="1 Introduction":
+#    flag = False
+#  if flag:
+#    # abstract_sect contains the Abstract section content
+#    abstract_sect+=page_1[0][i]
+#from transformers import pipeline
+#
+#summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
+#summary=(summarizer(abstract_sect))
+#summary_text=summary[0].get("summary_text")
+#print(summary_text)
+# ===========================================================
+summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
+sentence_to_audio(summary_txt)
 pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
 pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")