Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,8 @@ from streamlit.runtime.scriptrunner import add_script_run_ctx
|
|
13 |
#import streamlit.components.v1 as components
|
14 |
from streamlit.runtime import get_instance
|
15 |
|
16 |
-
from pypdf import PdfReader
|
|
|
17 |
import glob
|
18 |
import logging
|
19 |
|
@@ -168,19 +169,23 @@ This translation app is useful for people who want to translate something or wan
|
|
168 |
f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
|
169 |
)
|
170 |
|
171 |
-
doc = PdfReader(PDF[0])
|
|
|
172 |
# meta = doc.metadata
|
173 |
page_count = len(doc.pages)
|
174 |
|
175 |
book = [] # PDF text data pool
|
176 |
progressbar1 = st.empty()
|
177 |
my_bar1 = progressbar1.progress(0)
|
178 |
-
for index, page in enumerate(doc.pages):
|
179 |
-
|
|
|
|
|
180 |
book.append((index, page_text))
|
181 |
done = int(((index + 1) / page_count) * 100)
|
182 |
my_bar1.progress(done,
|
183 |
text=f"Reading Page Number : {index + 1}")
|
|
|
184 |
st.session_state.book = book
|
185 |
my_bar1.empty()
|
186 |
if os.path.isfile(
|
|
|
13 |
#import streamlit.components.v1 as components
|
14 |
from streamlit.runtime import get_instance
|
15 |
|
16 |
+
#from pypdf import PdfReader
|
17 |
+
import fitz # pymupdf
|
18 |
import glob
|
19 |
import logging
|
20 |
|
|
|
169 |
f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
|
170 |
)
|
171 |
|
172 |
+
#doc = PdfReader(PDF[0])
|
173 |
+
doc = fitz.open(PDF[0])
|
174 |
# meta = doc.metadata
|
175 |
page_count = len(doc.pages)
|
176 |
|
177 |
book = [] # PDF text data pool
|
178 |
progressbar1 = st.empty()
|
179 |
my_bar1 = progressbar1.progress(0)
|
180 |
+
#for index, page in enumerate(doc.pages):
|
181 |
+
for index, page in enumerate(doc)
|
182 |
+
#page_text = page.extract_text()
|
183 |
+
page_text = page.get_text()
|
184 |
book.append((index, page_text))
|
185 |
done = int(((index + 1) / page_count) * 100)
|
186 |
my_bar1.progress(done,
|
187 |
text=f"Reading Page Number : {index + 1}")
|
188 |
+
doc.close()
|
189 |
st.session_state.book = book
|
190 |
my_bar1.empty()
|
191 |
if os.path.isfile(
|