gagan3012 commited on
Commit
da55020
·
1 Parent(s): a273017

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -29
app.py CHANGED
@@ -7,7 +7,8 @@ import re
7
  import pytesseract
8
  from io import BytesIO
9
  import openai
10
-
 
11
 
12
 
13
  def predict_arabic(img, model_name="UBC-NLP/Qalam"):
@@ -79,6 +80,27 @@ def predict_nougat(img, model_name="facebook/nougat-small"):
79
  # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
80
  return page_sequence
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def predict_tesseract(img):
83
  text = pytesseract.image_to_string(Image.open(img))
84
  return text
@@ -101,7 +123,8 @@ st.set_page_config(
101
  st.header("Qalam: A Multilingual OCR System")
102
  st.sidebar.header("Configuration and Image Upload")
103
  st.sidebar.subheader("Adjust Image Enhancement Options")
104
- img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg'])
 
105
  realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
106
  # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
107
  aspect_choice = st.sidebar.radio(label="Aspect Ratio", options=[
@@ -149,6 +172,7 @@ if img_file:
149
  # st.subheader("Output: Preview and Analyze")
150
  # # _ = cropped_img.thumbnail((150, 150))
151
  # st.image(cropped_img)
 
152
  button = st.sidebar.button("Run OCR")
153
 
154
  if button:
@@ -169,36 +193,48 @@ if img_file:
169
  text_file = BytesIO(ocr_text.encode())
170
  st.download_button('Download Text', text_file, file_name='ocr_text.txt')
171
 
172
- openai.api_key = ""
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- if "openai_model" not in st.session_state:
175
- st.session_state["openai_model"] = "gpt-3.5-turbo"
176
 
177
- if "messages" not in st.session_state:
178
- st.session_state.messages = []
179
 
180
- for message in st.session_state.messages:
181
- with st.chat_message(message["role"]):
182
- st.markdown(message["content"])
183
 
184
- if prompt := st.chat_input("How can I help?"):
185
- st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
186
- with st.chat_message("user"):
187
- st.markdown(prompt)
188
 
189
- with st.chat_message("assistant"):
190
- message_placeholder = st.empty()
191
- full_response = ""
192
- for response in openai.ChatCompletion.create(
193
- model=st.session_state["openai_model"],
194
- messages=[
195
- {"role": m["role"], "content": m["content"]}
196
- for m in st.session_state.messages
197
- ],
198
- stream=True,
199
- ):
200
- full_response += response.choices[0].delta.get("content", "")
201
- message_placeholder.markdown(full_response + "▌")
202
- message_placeholder.markdown(full_response)
203
- st.session_state.messages.append({"role": "assistant", "content": full_response})
204
 
 
7
  import pytesseract
8
  from io import BytesIO
9
  import openai
10
+ import requests
11
+ from nougat.dataset.rasterize import rasterize_paper
12
 
13
 
14
  def predict_arabic(img, model_name="UBC-NLP/Qalam"):
 
80
  # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
81
  return page_sequence
82
 
83
+
84
+ def inference_nougat(pdf_file, pdf_link):
85
+ if pdf_file is None:
86
+ if pdf_link == '':
87
+ print("No file is uploaded and No link is provided")
88
+ return "No data provided. Upload a pdf file or provide a pdf link and try again!"
89
+ else:
90
+ file_name = get_pdf(pdf_link)
91
+ else:
92
+ file_name = pdf_file.name
93
+ pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
94
+
95
+ images = rasterize_paper(file_name, return_pil=True)
96
+ sequence = ""
97
+ # infer for every page and concat
98
+ for image in images:
99
+ sequence += predict_nougat(image)
100
+
101
+ content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
102
+ return content
103
+
104
  def predict_tesseract(img):
105
  text = pytesseract.image_to_string(Image.open(img))
106
  return text
 
123
  st.header("Qalam: A Multilingual OCR System")
124
  st.sidebar.header("Configuration and Image Upload")
125
  st.sidebar.subheader("Adjust Image Enhancement Options")
126
+ img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg', "pdf"])
127
+ input_file = st.sidebar.text("Enter the file URL")
128
  realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
129
  # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
130
  aspect_choice = st.sidebar.radio(label="Aspect Ratio", options=[
 
172
  # st.subheader("Output: Preview and Analyze")
173
  # # _ = cropped_img.thumbnail((150, 150))
174
  # st.image(cropped_img)
175
+
176
  button = st.sidebar.button("Run OCR")
177
 
178
  if button:
 
193
  text_file = BytesIO(ocr_text.encode())
194
  st.download_button('Download Text', text_file, file_name='ocr_text.txt')
195
 
196
+ elif input_file is not "":
197
+ button = st.sidebar.button("Run OCR")
198
+
199
+ if button:
200
+ with st.spinner('Running OCR...'):
201
+ ocr_text = inference_nougat(None, input_file)
202
+ st.subheader(f"OCR Results for the PDF file")
203
+ st.write(ocr_text)
204
+ text_file = BytesIO(ocr_text.encode())
205
+ st.download_button('Download Text', text_file, file_name='ocr_text.txt')
206
+
207
+
208
+ # openai.api_key = ""
209
 
210
+ # if "openai_model" not in st.session_state:
211
+ # st.session_state["openai_model"] = "gpt-3.5-turbo"
212
 
213
+ # if "messages" not in st.session_state:
214
+ # st.session_state.messages = []
215
 
216
+ # for message in st.session_state.messages:
217
+ # with st.chat_message(message["role"]):
218
+ # st.markdown(message["content"])
219
 
220
+ # if prompt := st.chat_input("How can I help?"):
221
+ # st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
222
+ # with st.chat_message("user"):
223
+ # st.markdown(prompt)
224
 
225
+ # with st.chat_message("assistant"):
226
+ # message_placeholder = st.empty()
227
+ # full_response = ""
228
+ # for response in openai.ChatCompletion.create(
229
+ # model=st.session_state["openai_model"],
230
+ # messages=[
231
+ # {"role": m["role"], "content": m["content"]}
232
+ # for m in st.session_state.messages
233
+ # ],
234
+ # stream=True,
235
+ # ):
236
+ # full_response += response.choices[0].delta.get("content", "")
237
+ # message_placeholder.markdown(full_response + "▌")
238
+ # message_placeholder.markdown(full_response)
239
+ # st.session_state.messages.append({"role": "assistant", "content": full_response})
240