improve interface logic with image editor and parse res
Browse files
app.py
CHANGED
|
@@ -24,11 +24,6 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
|
|
| 24 |
model = model.eval().cuda()
|
| 25 |
model.config.pad_token_id = tokenizer.eos_token_id
|
| 26 |
|
| 27 |
-
def image_to_base64(image):
|
| 28 |
-
buffered = io.BytesIO()
|
| 29 |
-
image.save(buffered, format="PNG")
|
| 30 |
-
return base64.b64encode(buffered.getvalue()).decode()
|
| 31 |
-
|
| 32 |
UPLOAD_FOLDER = "./uploads"
|
| 33 |
RESULTS_FOLDER = "./results"
|
| 34 |
|
|
@@ -36,6 +31,13 @@ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
|
|
| 36 |
if not os.path.exists(folder):
|
| 37 |
os.makedirs(folder)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
@spaces.GPU()
|
| 40 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
| 41 |
if image is None:
|
|
@@ -45,9 +47,25 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
| 45 |
image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
|
| 46 |
result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
|
| 47 |
|
| 48 |
-
shutil.copy(image, image_path)
|
| 49 |
-
|
| 50 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
if task == "Plain Text OCR":
|
| 52 |
res = model.chat(tokenizer, image_path, ocr_type='ocr')
|
| 53 |
return res, None, unique_id
|
|
@@ -74,21 +92,34 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
| 74 |
finally:
|
| 75 |
if os.path.exists(image_path):
|
| 76 |
os.remove(image_path)
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def update_inputs(task):
|
| 79 |
if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
|
| 80 |
-
return [gr.update(visible=False)] *
|
| 81 |
elif task == "Fine-grained OCR (Box)":
|
| 82 |
return [
|
| 83 |
gr.update(visible=True, choices=["ocr", "format"]),
|
| 84 |
gr.update(visible=True),
|
| 85 |
gr.update(visible=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
]
|
| 87 |
elif task == "Fine-grained OCR (Color)":
|
| 88 |
return [
|
| 89 |
gr.update(visible=True, choices=["ocr", "format"]),
|
| 90 |
gr.update(visible=False),
|
| 91 |
gr.update(visible=True, choices=["red", "green", "blue"]),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
]
|
| 93 |
|
| 94 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
@@ -96,7 +127,9 @@ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
| 96 |
|
| 97 |
if res.startswith("Error:"):
|
| 98 |
return res, None
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
res = f"$$ {res} $$"
|
| 101 |
|
| 102 |
if html_content:
|
|
@@ -118,18 +151,11 @@ with gr.Blocks() as demo:
|
|
| 118 |
with gr.Row():
|
| 119 |
gr.Markdown(title)
|
| 120 |
|
| 121 |
-
with gr.Row():
|
| 122 |
-
with gr.Column(scale=1):
|
| 123 |
-
gr.Markdown(description)
|
| 124 |
-
with gr.Column(scale=1):
|
| 125 |
-
with gr.Group():
|
| 126 |
-
gr.Markdown(modelinfor)
|
| 127 |
-
gr.Markdown(joinus)
|
| 128 |
-
|
| 129 |
with gr.Row():
|
| 130 |
with gr.Column(scale=1):
|
| 131 |
with gr.Group():
|
| 132 |
image_input = gr.Image(type="filepath", label="Input Image")
|
|
|
|
| 133 |
task_dropdown = gr.Dropdown(
|
| 134 |
choices=[
|
| 135 |
"Plain Text OCR",
|
|
@@ -158,6 +184,7 @@ with gr.Blocks() as demo:
|
|
| 158 |
visible=False
|
| 159 |
)
|
| 160 |
submit_button = gr.Button("Process")
|
|
|
|
| 161 |
|
| 162 |
with gr.Column(scale=1):
|
| 163 |
with gr.Group():
|
|
@@ -167,7 +194,13 @@ with gr.Blocks() as demo:
|
|
| 167 |
task_dropdown.change(
|
| 168 |
update_inputs,
|
| 169 |
inputs=[task_dropdown],
|
| 170 |
-
outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
submit_button.click(
|
|
@@ -176,6 +209,12 @@ with gr.Blocks() as demo:
|
|
| 176 |
outputs=[output_markdown, output_html]
|
| 177 |
)
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
if __name__ == "__main__":
|
| 180 |
cleanup_old_files()
|
| 181 |
demo.launch()
|
|
|
|
| 24 |
model = model.eval().cuda()
|
| 25 |
model.config.pad_token_id = tokenizer.eos_token_id
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
UPLOAD_FOLDER = "./uploads"
|
| 28 |
RESULTS_FOLDER = "./results"
|
| 29 |
|
|
|
|
| 31 |
if not os.path.exists(folder):
|
| 32 |
os.makedirs(folder)
|
| 33 |
|
| 34 |
+
def image_to_base64(image):
|
| 35 |
+
buffered = io.BytesIO()
|
| 36 |
+
image.save(buffered, format="PNG")
|
| 37 |
+
return base64.b64encode(buffered.getvalue()).decode()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
@spaces.GPU()
|
| 42 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
| 43 |
if image is None:
|
|
|
|
| 47 |
image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
|
| 48 |
result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
|
| 49 |
|
|
|
|
|
|
|
| 50 |
try:
|
| 51 |
+
if isinstance(image, dict): # If image is from ImageEditor
|
| 52 |
+
composite_image = image.get("composite")
|
| 53 |
+
if composite_image is not None:
|
| 54 |
+
if isinstance(composite_image, np.ndarray):
|
| 55 |
+
Image.fromarray(composite_image).save(image_path)
|
| 56 |
+
elif isinstance(composite_image, str):
|
| 57 |
+
shutil.copy(composite_image, image_path)
|
| 58 |
+
else:
|
| 59 |
+
return "Error: Unsupported image format from ImageEditor", None, None
|
| 60 |
+
else:
|
| 61 |
+
return "Error: No composite image found in ImageEditor output", None, None
|
| 62 |
+
elif isinstance(image, np.ndarray):
|
| 63 |
+
Image.fromarray(image).save(image_path)
|
| 64 |
+
elif isinstance(image, str):
|
| 65 |
+
shutil.copy(image, image_path)
|
| 66 |
+
else:
|
| 67 |
+
return "Error: Unsupported image format", None, None
|
| 68 |
+
|
| 69 |
if task == "Plain Text OCR":
|
| 70 |
res = model.chat(tokenizer, image_path, ocr_type='ocr')
|
| 71 |
return res, None, unique_id
|
|
|
|
| 92 |
finally:
|
| 93 |
if os.path.exists(image_path):
|
| 94 |
os.remove(image_path)
|
| 95 |
+
def update_image_input(task):
|
| 96 |
+
if task == "Fine-grained OCR (Color)":
|
| 97 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
|
| 98 |
+
else:
|
| 99 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
| 100 |
+
|
| 101 |
def update_inputs(task):
|
| 102 |
if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
|
| 103 |
+
return [gr.update(visible=False)] * 5 + [gr.update(visible=True), gr.update(visible=False)]
|
| 104 |
elif task == "Fine-grained OCR (Box)":
|
| 105 |
return [
|
| 106 |
gr.update(visible=True, choices=["ocr", "format"]),
|
| 107 |
gr.update(visible=True),
|
| 108 |
gr.update(visible=False),
|
| 109 |
+
gr.update(visible=False),
|
| 110 |
+
gr.update(visible=False),
|
| 111 |
+
gr.update(visible=True),
|
| 112 |
+
gr.update(visible=False)
|
| 113 |
]
|
| 114 |
elif task == "Fine-grained OCR (Color)":
|
| 115 |
return [
|
| 116 |
gr.update(visible=True, choices=["ocr", "format"]),
|
| 117 |
gr.update(visible=False),
|
| 118 |
gr.update(visible=True, choices=["red", "green", "blue"]),
|
| 119 |
+
gr.update(visible=False),
|
| 120 |
+
gr.update(visible=True),
|
| 121 |
+
gr.update(visible=False),
|
| 122 |
+
gr.update(visible=True)
|
| 123 |
]
|
| 124 |
|
| 125 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
|
|
| 127 |
|
| 128 |
if res.startswith("Error:"):
|
| 129 |
return res, None
|
| 130 |
+
|
| 131 |
+
res = res.replace("\\title", "\\title ")
|
| 132 |
+
|
| 133 |
res = f"$$ {res} $$"
|
| 134 |
|
| 135 |
if html_content:
|
|
|
|
| 151 |
with gr.Row():
|
| 152 |
gr.Markdown(title)
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
with gr.Row():
|
| 155 |
with gr.Column(scale=1):
|
| 156 |
with gr.Group():
|
| 157 |
image_input = gr.Image(type="filepath", label="Input Image")
|
| 158 |
+
image_editor = gr.ImageEditor(label="Image Editor", type="pil", visible=False)
|
| 159 |
task_dropdown = gr.Dropdown(
|
| 160 |
choices=[
|
| 161 |
"Plain Text OCR",
|
|
|
|
| 184 |
visible=False
|
| 185 |
)
|
| 186 |
submit_button = gr.Button("Process")
|
| 187 |
+
editor_submit_button = gr.Button("Process Edited Image", visible=False)
|
| 188 |
|
| 189 |
with gr.Column(scale=1):
|
| 190 |
with gr.Group():
|
|
|
|
| 194 |
task_dropdown.change(
|
| 195 |
update_inputs,
|
| 196 |
inputs=[task_dropdown],
|
| 197 |
+
outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, image_input, image_editor, submit_button, editor_submit_button]
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
task_dropdown.change(
|
| 201 |
+
update_image_input,
|
| 202 |
+
inputs=[task_dropdown],
|
| 203 |
+
outputs=[image_input, image_editor, editor_submit_button]
|
| 204 |
)
|
| 205 |
|
| 206 |
submit_button.click(
|
|
|
|
| 209 |
outputs=[output_markdown, output_html]
|
| 210 |
)
|
| 211 |
|
| 212 |
+
editor_submit_button.click(
|
| 213 |
+
ocr_demo,
|
| 214 |
+
inputs=[image_editor, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
|
| 215 |
+
outputs=[output_markdown, output_html]
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
if __name__ == "__main__":
|
| 219 |
cleanup_old_files()
|
| 220 |
demo.launch()
|