Spaces:

Bhashini-IITJ
/

IndicPhotoOCR

Paused

App Files Files Community

IndicPhotoOCR / app.py

anikde

added examples

e2a45a4 about 2 months ago

raw

history blame

5.08 kB

	import gradio as gr
	from PIL import Image
	import tempfile
	import os
	from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py
	from IndicPhotoOCR.theme import Seafoam
	from IndicPhotoOCR.utils.helper import detect_para

	# Possible values for identifier_lang
	VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed

	def process_image(image, identifier_lang):
	"""
	Processes the uploaded image for text detection and recognition.
	- Detects bounding boxes in the image
	- Draws bounding boxes on the image and identifies script in each detected area
	- Recognizes text in each cropped region and returns the annotated image and recognized text

	Parameters:
	image (PIL.Image): The input image to be processed.
	identifier_lang (str): The script identifier model to use.

	Returns:
	tuple: A PIL.Image with bounding boxes and a string of recognized text.
	"""

	# Save the input image temporarily
	with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input:
	image.save(temp_input.name)
	image_path = temp_input.name

	# Initialize OCR with the selected identifier language
	ocr = OCR(identifier_lang=identifier_lang, verbose=False)

	# Detect bounding boxes on the image using OCR
	detections = ocr.detect(image_path)

	output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name

	# Draw bounding boxes on the image and save it as output
	ocr.visualize_detection(image_path, detections, save_path=output_path)

	# Load the annotated image with bounding boxes drawn
	output_image = Image.open(output_path)

	# Recognize text from the detected areas
	recognized_text = ocr.ocr(image_path)
	recognized_text = '\n'.join([' '.join(line) for line in recognized_text])

	return output_image, recognized_text

	# Custom HTML for interface header with logos and alignment
	interface_html = """
	<div style="text-align: left; padding: 10px;">
	<div style="background-color: white; padding: 10px; display: inline-block;">
	<img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
	</div>
	<img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
	</div>
	"""



	# Links to GitHub and Dataset repositories with GitHub icon
	links_html = """
	<div style="text-align: center; padding-top: 20px;">
	<a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
	GitHub Repository
	</a>
	<a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
	Dataset Repository
	</a>
	</div>
	"""

	# Custom CSS to style the text box and center the title
	custom_css = """
	.custom-textbox textarea {
	font-size: 20px !important;
	}

	#title {
	text-align: center;
	font-size: 28px;
	font-weight: bold;
	margin-bottom: 20px;
	}
	"""

	# Create an instance of the Seafoam theme for a consistent visual style
	seafoam = Seafoam()

	# Clear function
	def clear_inputs():
	return None, "auto", None, ""

	# Define the Gradio Blocks interface
	with gr.Blocks(theme=seafoam, css=custom_css) as demo:

	gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title")
	gr.Markdown("# Developed by IIT Jodhpur", elem_id="title")
	gr.Markdown(interface_html + links_html)

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image")
	lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto")
	run_button = gr.Button("Run OCR")
	clear_button = gr.Button("Clear", variant="stop") # Added Clear Button


	with gr.Column():
	output_image = gr.Image(type="pil", label="Processed Image")
	output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox")

	# Examples shown separately (to avoid schema error)
	gr.Examples(
	examples=[["test_images/image_88.jpg", "auto"],
	["test_images/image_742.jpg", "hindi"]],
	inputs=[input_image, lang_dropdown],
	label="Try an example"
	)

	# Connect logic
	run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text])
	clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text]) # Clear logic


	# Launch
	demo.launch(share=True)

	# # 👇 Local server launch config
	# if __name__ == "__main__":
	# demo.launch(
	# server_name="0.0.0.0",
	# server_port=7866,
	# share=False
	# )