Spaces:

gagan3012
/

QalamV0.2

Runtime error

App Files Files Community

gagan3012 commited on Oct 26, 2023

Commit

da55020

1 Parent(s): a273017

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -29

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ import re
 import pytesseract
 from io import BytesIO
 import openai
 def predict_arabic(img, model_name="UBC-NLP/Qalam"):
@@ -79,6 +80,27 @@ def predict_nougat(img, model_name="facebook/nougat-small"):
     # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
     return page_sequence
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
@@ -101,7 +123,8 @@ st.set_page_config(
 st.header("Qalam: A Multilingual OCR System")
 st.sidebar.header("Configuration and Image Upload")
 st.sidebar.subheader("Adjust Image Enhancement Options")
-img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg'])
 realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
 # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
 aspect_choice = st.sidebar.radio(label="Aspect Ratio", options=[
@@ -149,6 +172,7 @@ if img_file:
     #     st.subheader("Output: Preview and Analyze")
     #     # _ = cropped_img.thumbnail((150, 150))
     #     st.image(cropped_img)
     button = st.sidebar.button("Run OCR")
     if button:
@@ -169,36 +193,48 @@ if img_file:
         text_file = BytesIO(ocr_text.encode())
         st.download_button('Download Text', text_file, file_name='ocr_text.txt')
-        openai.api_key = ""
-        if "openai_model" not in st.session_state:
-            st.session_state["openai_model"] = "gpt-3.5-turbo"
-        if "messages" not in st.session_state:
-            st.session_state.messages = []
-        for message in st.session_state.messages:
-            with st.chat_message(message["role"]):
-                st.markdown(message["content"])
-        if prompt := st.chat_input("How can I help?"):
-            st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
-            with st.chat_message("user"):
-                st.markdown(prompt)
-            with st.chat_message("assistant"):
-                message_placeholder = st.empty()
-                full_response = ""
-                for response in openai.ChatCompletion.create(
-                    model=st.session_state["openai_model"],
-                    messages=[
-                        {"role": m["role"], "content": m["content"]}
-                        for m in st.session_state.messages
-                    ],
-                    stream=True,
-                ):
-                    full_response += response.choices[0].delta.get("content", "")
-                    message_placeholder.markdown(full_response + "▌")
-                message_placeholder.markdown(full_response)
-            st.session_state.messages.append({"role": "assistant", "content": full_response})

 import pytesseract
 from io import BytesIO
 import openai
+import requests
+from nougat.dataset.rasterize import rasterize_paper
 def predict_arabic(img, model_name="UBC-NLP/Qalam"):
     # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
     return page_sequence
+def inference_nougat(pdf_file, pdf_link):
+    if pdf_file is None:
+        if pdf_link == '':
+          print("No file is uploaded and No link is provided")
+          return "No data provided. Upload a pdf file or provide a pdf link and try again!"
+        else:
+            file_name = get_pdf(pdf_link)
+    else:
+        file_name = pdf_file.name
+        pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
+    images = rasterize_paper(file_name, return_pil=True)
+    sequence = ""
+    # infer for every page and concat
+    for image in images:
+        sequence += predict_nougat(image)
+    content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
+    return content
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
 st.header("Qalam: A Multilingual OCR System")
 st.sidebar.header("Configuration and Image Upload")
 st.sidebar.subheader("Adjust Image Enhancement Options")
+img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg', "pdf"])
+input_file = st.sidebar.text("Enter the file URL")
 realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
 # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
 aspect_choice = st.sidebar.radio(label="Aspect Ratio", options=[
     #     st.subheader("Output: Preview and Analyze")
     #     # _ = cropped_img.thumbnail((150, 150))
     #     st.image(cropped_img)
     button = st.sidebar.button("Run OCR")
     if button:
         text_file = BytesIO(ocr_text.encode())
         st.download_button('Download Text', text_file, file_name='ocr_text.txt')
+elif input_file is not "":
+    button = st.sidebar.button("Run OCR")
+    if button:
+        with st.spinner('Running OCR...'):
+            ocr_text = inference_nougat(None, input_file)
+            st.subheader(f"OCR Results for the PDF file")
+            st.write(ocr_text)
+            text_file = BytesIO(ocr_text.encode())
+            st.download_button('Download Text', text_file, file_name='ocr_text.txt')
+        # openai.api_key = ""
+        # if "openai_model" not in st.session_state:
+        #     st.session_state["openai_model"] = "gpt-3.5-turbo"
+        # if "messages" not in st.session_state:
+        #     st.session_state.messages = []
+        # for message in st.session_state.messages:
+        #     with st.chat_message(message["role"]):
+        #         st.markdown(message["content"])
+        # if prompt := st.chat_input("How can I help?"):
+        #     st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
+        #     with st.chat_message("user"):
+        #         st.markdown(prompt)
+        #     with st.chat_message("assistant"):
+        #         message_placeholder = st.empty()
+        #         full_response = ""
+        #         for response in openai.ChatCompletion.create(
+        #             model=st.session_state["openai_model"],
+        #             messages=[
+        #                 {"role": m["role"], "content": m["content"]}
+        #                 for m in st.session_state.messages
+        #             ],
+        #             stream=True,
+        #         ):
+        #             full_response += response.choices[0].delta.get("content", "")
+        #             message_placeholder.markdown(full_response + "▌")
+        #         message_placeholder.markdown(full_response)
+        #     st.session_state.messages.append({"role": "assistant", "content": full_response})