import gradio as gr from PIL import Image import tempfile import os from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py from IndicPhotoOCR.theme import Seafoam from IndicPhotoOCR.utils.helper import detect_para # Possible values for identifier_lang VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed def process_image(image, identifier_lang): """ Processes the uploaded image for text detection and recognition. - Detects bounding boxes in the image - Draws bounding boxes on the image and identifies script in each detected area - Recognizes text in each cropped region and returns the annotated image and recognized text Parameters: image (PIL.Image): The input image to be processed. identifier_lang (str): The script identifier model to use. Returns: tuple: A PIL.Image with bounding boxes and a string of recognized text. """ # Save the input image temporarily with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input: image.save(temp_input.name) image_path = temp_input.name # Initialize OCR with the selected identifier language ocr = OCR(identifier_lang=identifier_lang, verbose=False) # Detect bounding boxes on the image using OCR detections = ocr.detect(image_path) output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name # Draw bounding boxes on the image and save it as output ocr.visualize_detection(image_path, detections, save_path=output_path) # Load the annotated image with bounding boxes drawn output_image = Image.open(output_path) # Recognize text from the detected areas recognized_text = ocr.ocr(image_path) recognized_text = '\n'.join([' '.join(line) for line in recognized_text]) return output_image, recognized_text # Custom HTML for interface header with logos and alignment interface_html = """
IITJ Logo
Bhashini Logo
""" # Links to GitHub and Dataset repositories with GitHub icon links_html = """
GitHub Repository Dataset Repository
""" # Custom CSS to style the text box and center the title custom_css = """ .custom-textbox textarea { font-size: 20px !important; } #title { text-align: center; font-size: 28px; font-weight: bold; margin-bottom: 20px; } """ # Create an instance of the Seafoam theme for a consistent visual style seafoam = Seafoam() # Clear function def clear_inputs(): return None, "auto", None, "" # Define the Gradio Blocks interface with gr.Blocks(theme=seafoam, css=custom_css) as demo: gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title") gr.Markdown("# Developed by IIT Jodhpur", elem_id="title") gr.Markdown(interface_html + links_html) with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image") lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto") run_button = gr.Button("Run OCR") clear_button = gr.Button("Clear", variant="stop") # Added Clear Button with gr.Column(): output_image = gr.Image(type="pil", label="Processed Image") output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox") # Examples shown separately (to avoid schema error) gr.Examples( examples=[["test_images/image_88.jpg", "auto"], ["test_images/image_742.jpg", "hindi"]], inputs=[input_image, lang_dropdown], label="Try an example" ) # Connect logic run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text]) clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text]) # Clear logic # Launch demo.launch(share=True) # # 👇 Local server launch config # if __name__ == "__main__": # demo.launch( # server_name="0.0.0.0", # server_port=7866, # share=False # )