nnpy commited on
Commit
decd62f
·
verified ·
1 Parent(s): 2d3d7c6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +22 -0
  2. requirements.txt +49 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tabula
2
+ import pandas as pd
3
+ import streamlit as st
4
+
5
+ st.title("TableOCR")
6
+
7
+ with st.form(key='my_form'):
8
+ file = st.file_uploader("Upload a file", type="pdf", accept_multiple_files=False)
9
+ page_no = st.number_input("Enter page number", min_value=1, value=1)
10
+ submit_button = st.form_submit_button(label='Submit')
11
+
12
+ if submit_button and file is not None and page_no is not None:
13
+ with st.spinner("Converting PDF page to image..."):
14
+ tables = tabula.read_pdf(file, pages=page_no, multiple_tables=True)
15
+ table_df = tables[0] if tables else pd.DataFrame()
16
+ st.write("Scroll down to download the output file.")
17
+ st.table(table_df)
18
+ table_df.to_excel("output.xlsx", index=False)
19
+ st.markdown(
20
+ f'<a href="output.xlsx" download="output.xlsx">Click here to download the output file</a>',
21
+ unsafe_allow_html=True
22
+ )
requirements.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.2.0
2
+ attrs==23.2.0
3
+ blinker==1.7.0
4
+ cachetools==5.3.3
5
+ certifi==2024.2.2
6
+ charset-normalizer==3.3.2
7
+ click==8.1.7
8
+ distro==1.9.0
9
+ et-xmlfile==1.1.0
10
+ gitdb==4.0.11
11
+ GitPython==3.1.42
12
+ idna==3.6
13
+ importlib-metadata==7.0.1
14
+ Jinja2==3.1.3
15
+ jsonschema==4.21.1
16
+ jsonschema-specifications==2023.12.1
17
+ markdown-it-py==3.0.0
18
+ MarkupSafe==2.1.5
19
+ mdurl==0.1.2
20
+ numpy==1.26.4
21
+ openpyxl==3.1.2
22
+ packaging==23.2
23
+ pandas==2.2.1
24
+ pillow==10.2.0
25
+ protobuf==4.25.3
26
+ pyarrow==15.0.0
27
+ pydeck==0.8.1b0
28
+ Pygments==2.17.2
29
+ python-dateutil==2.9.0.post0
30
+ pytz==2024.1
31
+ referencing==0.33.0
32
+ requests==2.31.0
33
+ rich==13.7.1
34
+ rpds-py==0.18.0
35
+ six==1.16.0
36
+ smmap==5.0.1
37
+ streamlit==1.31.1
38
+ tabula-py==2.9.0
39
+ tenacity==8.2.3
40
+ toml==0.10.2
41
+ toolz==0.12.1
42
+ tornado==6.4
43
+ typing_extensions==4.10.0
44
+ tzdata==2024.1
45
+ tzlocal==5.2
46
+ urllib3==2.2.1
47
+ validators==0.22.0
48
+ watchdog==4.0.0
49
+ zipp==3.17.0