kuroiikimono commited on
Commit
381a898
·
verified ·
1 Parent(s): ea583ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -13,7 +13,8 @@ from streamlit.runtime.scriptrunner import add_script_run_ctx
13
  #import streamlit.components.v1 as components
14
  from streamlit.runtime import get_instance
15
 
16
- from pypdf import PdfReader
 
17
  import glob
18
  import logging
19
 
@@ -167,19 +168,23 @@ This translation app is useful for people who want to translate something or wan
167
  f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
168
  )
169
 
170
- doc = PdfReader(PDF[0])
 
171
  # meta = doc.metadata
172
- page_count = len(doc.pages)
173
-
174
  book = [] # PDF text data pool
175
  progressbar1 = st.empty()
176
  my_bar1 = progressbar1.progress(0)
177
- for index, page in enumerate(doc.pages):
178
- page_text = page.extract_text()
 
 
179
  book.append((index, page_text))
180
  done = int(((index + 1) / page_count) * 100)
181
  my_bar1.progress(done,
182
  text=f"Reading Page Number : {index + 1}")
 
183
  st.session_state.book = book
184
  my_bar1.empty()
185
  if os.path.isfile(
 
13
  #import streamlit.components.v1 as components
14
  from streamlit.runtime import get_instance
15
 
16
+ #from pypdf import PdfReader
17
+ import fitz
18
  import glob
19
  import logging
20
 
 
168
  f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
169
  )
170
 
171
+ #doc = PdfReader(PDF[0])
172
+ doc = fitz.open(PDF[0])
173
  # meta = doc.metadata
174
+ #page_count = len(doc.pages)
175
+ page_count = len(doc)
176
  book = [] # PDF text data pool
177
  progressbar1 = st.empty()
178
  my_bar1 = progressbar1.progress(0)
179
+ #for index, page in enumerate(doc.pages):
180
+ for index, page in enumerate(doc):
181
+ #page_text = page.extract_text()
182
+ page_text = page.get_text(sort=True)
183
  book.append((index, page_text))
184
  done = int(((index + 1) / page_count) * 100)
185
  my_bar1.progress(done,
186
  text=f"Reading Page Number : {index + 1}")
187
+ doc.close()
188
  st.session_state.book = book
189
  my_bar1.empty()
190
  if os.path.isfile(