|
import gradio as gr |
|
from PIL import Image |
|
import tempfile |
|
import os |
|
from IndicPhotoOCR.ocr import OCR |
|
from IndicPhotoOCR.theme import Seafoam |
|
from IndicPhotoOCR.utils.helper import detect_para |
|
|
|
|
|
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] |
|
|
|
def process_image(image, identifier_lang): |
|
""" |
|
Processes the uploaded image for text detection and recognition. |
|
- Detects bounding boxes in the image |
|
- Draws bounding boxes on the image and identifies script in each detected area |
|
- Recognizes text in each cropped region and returns the annotated image and recognized text |
|
|
|
Parameters: |
|
image (PIL.Image): The input image to be processed. |
|
identifier_lang (str): The script identifier model to use. |
|
|
|
Returns: |
|
tuple: A PIL.Image with bounding boxes and a string of recognized text. |
|
""" |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input: |
|
image.save(temp_input.name) |
|
image_path = temp_input.name |
|
|
|
|
|
ocr = OCR(identifier_lang=identifier_lang, verbose=False) |
|
|
|
|
|
detections = ocr.detect(image_path) |
|
|
|
output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name |
|
|
|
|
|
ocr.visualize_detection(image_path, detections, save_path=output_path) |
|
|
|
|
|
output_image = Image.open(output_path) |
|
|
|
|
|
recognized_text = ocr.ocr(image_path) |
|
recognized_text = '\n'.join([' '.join(line) for line in recognized_text]) |
|
|
|
return output_image, recognized_text |
|
|
|
|
|
interface_html = """ |
|
<div style="text-align: left; padding: 10px;"> |
|
<div style="background-color: white; padding: 10px; display: inline-block;"> |
|
<img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;"> |
|
</div> |
|
<img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;"> |
|
</div> |
|
""" |
|
|
|
|
|
|
|
|
|
links_html = """ |
|
<div style="text-align: center; padding-top: 20px;"> |
|
<a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;"> |
|
GitHub Repository |
|
</a> |
|
<a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;"> |
|
Dataset Repository |
|
</a> |
|
</div> |
|
""" |
|
|
|
|
|
custom_css = """ |
|
.custom-textbox textarea { |
|
font-size: 20px !important; |
|
} |
|
|
|
#title { |
|
text-align: center; |
|
font-size: 28px; |
|
font-weight: bold; |
|
margin-bottom: 20px; |
|
} |
|
""" |
|
|
|
|
|
seafoam = Seafoam() |
|
|
|
|
|
def clear_inputs(): |
|
return None, "auto", None, "" |
|
|
|
|
|
with gr.Blocks(theme=seafoam, css=custom_css) as demo: |
|
|
|
gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title") |
|
gr.Markdown("# Developed by IIT Jodhpur", elem_id="title") |
|
gr.Markdown(interface_html + links_html) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image") |
|
lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto") |
|
run_button = gr.Button("Run OCR") |
|
clear_button = gr.Button("Clear", variant="stop") |
|
|
|
|
|
with gr.Column(): |
|
output_image = gr.Image(type="pil", label="Processed Image") |
|
output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox") |
|
|
|
|
|
gr.Examples( |
|
examples=[["test_images/image_88.jpg", "auto"], |
|
["test_images/image_742.jpg", "hindi"]], |
|
inputs=[input_image, lang_dropdown], |
|
label="Try an example" |
|
) |
|
|
|
|
|
run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text]) |
|
clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text]) |
|
|
|
|
|
|
|
demo.launch(share=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|