Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,8 @@ from streamlit.runtime.scriptrunner import add_script_run_ctx
|
|
13 |
#import streamlit.components.v1 as components
|
14 |
from streamlit.runtime import get_instance
|
15 |
|
16 |
-
from pypdf import PdfReader
|
|
|
17 |
import glob
|
18 |
import logging
|
19 |
|
@@ -167,19 +168,23 @@ This translation app is useful for people who want to translate something or wan
|
|
167 |
f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
|
168 |
)
|
169 |
|
170 |
-
doc = PdfReader(PDF[0])
|
|
|
171 |
# meta = doc.metadata
|
172 |
-
page_count = len(doc.pages)
|
173 |
-
|
174 |
book = [] # PDF text data pool
|
175 |
progressbar1 = st.empty()
|
176 |
my_bar1 = progressbar1.progress(0)
|
177 |
-
for index, page in enumerate(doc.pages):
|
178 |
-
|
|
|
|
|
179 |
book.append((index, page_text))
|
180 |
done = int(((index + 1) / page_count) * 100)
|
181 |
my_bar1.progress(done,
|
182 |
text=f"Reading Page Number : {index + 1}")
|
|
|
183 |
st.session_state.book = book
|
184 |
my_bar1.empty()
|
185 |
if os.path.isfile(
|
|
|
13 |
#import streamlit.components.v1 as components
|
14 |
from streamlit.runtime import get_instance
|
15 |
|
16 |
+
#from pypdf import PdfReader
|
17 |
+
import fitz
|
18 |
import glob
|
19 |
import logging
|
20 |
|
|
|
168 |
f"removefolder/{temp_dir}/upload_folder_{st.session_state.count}/{uploadedfilename}.pdf"
|
169 |
)
|
170 |
|
171 |
+
#doc = PdfReader(PDF[0])
|
172 |
+
doc = fitz.open(PDF[0])
|
173 |
# meta = doc.metadata
|
174 |
+
#page_count = len(doc.pages)
|
175 |
+
page_count = len(doc)
|
176 |
book = [] # PDF text data pool
|
177 |
progressbar1 = st.empty()
|
178 |
my_bar1 = progressbar1.progress(0)
|
179 |
+
#for index, page in enumerate(doc.pages):
|
180 |
+
for index, page in enumerate(doc):
|
181 |
+
#page_text = page.extract_text()
|
182 |
+
page_text = page.get_text(sort=True)
|
183 |
book.append((index, page_text))
|
184 |
done = int(((index + 1) / page_count) * 100)
|
185 |
my_bar1.progress(done,
|
186 |
text=f"Reading Page Number : {index + 1}")
|
187 |
+
doc.close()
|
188 |
st.session_state.book = book
|
189 |
my_bar1.empty()
|
190 |
if os.path.isfile(
|