kuroiikimono commited on
Commit
42a4d93
·
verified ·
1 Parent(s): 430a98a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -13,7 +13,8 @@ from streamlit.runtime.scriptrunner import add_script_run_ctx
13
  #import streamlit.components.v1 as components
14
  from streamlit.runtime import get_instance
15
 
16
- from pypdf import PdfReader
 
17
  import glob
18
  import logging
19
 
@@ -168,19 +169,23 @@ This translation app is useful for people who want to translate something or wan
168
  f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
169
  )
170
 
171
- doc = PdfReader(PDF[0])
 
172
  # meta = doc.metadata
173
  page_count = len(doc.pages)
174
 
175
  book = [] # PDF text data pool
176
  progressbar1 = st.empty()
177
  my_bar1 = progressbar1.progress(0)
178
- for index, page in enumerate(doc.pages):
179
- page_text = page.extract_text()
 
 
180
  book.append((index, page_text))
181
  done = int(((index + 1) / page_count) * 100)
182
  my_bar1.progress(done,
183
  text=f"Reading Page Number : {index + 1}")
 
184
  st.session_state.book = book
185
  my_bar1.empty()
186
  if os.path.isfile(
 
13
  #import streamlit.components.v1 as components
14
  from streamlit.runtime import get_instance
15
 
16
+ #from pypdf import PdfReader
17
+ import fitz # pymupdf
18
  import glob
19
  import logging
20
 
 
169
  f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
170
  )
171
 
172
+ #doc = PdfReader(PDF[0])
173
+ doc = fitz.open(PDF[0])
174
  # meta = doc.metadata
175
  page_count = len(doc.pages)
176
 
177
  book = [] # PDF text data pool
178
  progressbar1 = st.empty()
179
  my_bar1 = progressbar1.progress(0)
180
+ #for index, page in enumerate(doc.pages):
181
+ for index, page in enumerate(doc)
182
+ #page_text = page.extract_text()
183
+ page_text = page.get_text()
184
  book.append((index, page_text))
185
  done = int(((index + 1) / page_count) * 100)
186
  my_bar1.progress(done,
187
  text=f"Reading Page Number : {index + 1}")
188
+ doc.close()
189
  st.session_state.book = book
190
  my_bar1.empty()
191
  if os.path.isfile(