Spaces:

ndtran
/

ocr-vqa

Sleeping

ndtran commited on Jul 10, 2023

Commit

4ab35d5

1 Parent(s): a364253

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-import torch, os, json, requests
 from PIL import Image
 from transformers import DonutProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
 from torchvision import transforms
@@ -51,6 +51,8 @@ class OCRVQAModel(torch.nn.Module):
         # try:
         self.donut.eval()
         with torch.no_grad():
             image_ids = self.processor(image, return_tensors="pt").pixel_values.to(device)
             question = f'<s_docvqa><s_question>{prompt}</s_question><s_answer>'
@@ -120,7 +122,6 @@ with gr.Blocks() as demo:
             image_url = gr.Textbox(lines=1, label="Image URL", placeholder="Or, paste the image URL here")
             question = gr.Textbox(lines=5, label="Question")
-        with gr.Column():
             ask = gr.Button(label="Get the answer")
         with gr.Column():

 import gradio as gr
+import torch, os, json, requests, sys
 from PIL import Image
 from transformers import DonutProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
 from torchvision import transforms
         # try:
         self.donut.eval()
         with torch.no_grad():
+            print(type(image), type(prompt), file = sys.stderr)
             image_ids = self.processor(image, return_tensors="pt").pixel_values.to(device)
             question = f'<s_docvqa><s_question>{prompt}</s_question><s_answer>'
             image_url = gr.Textbox(lines=1, label="Image URL", placeholder="Or, paste the image URL here")
             question = gr.Textbox(lines=5, label="Question")
             ask = gr.Button(label="Get the answer")
         with gr.Column():