Spaces:
Running
Running
| import torch | |
| import gradio as gr | |
| from read import text_recognizer | |
| from model import Model | |
| from utils import CTCLabelConverter | |
| from ultralytics import YOLO | |
| from PIL import ImageDraw | |
| """ vocab / character number configuration """ | |
| file = open("UrduGlyphs.txt","r",encoding="utf-8") | |
| content = file.readlines() | |
| content = ''.join([str(elem).strip('\n') for elem in content]) | |
| content = content+" " | |
| """ model configuration """ | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| converter = CTCLabelConverter(content) | |
| recognition_model = Model(num_class=len(converter.character), device=device) | |
| recognition_model = recognition_model.to(device) | |
| recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device)) | |
| recognition_model.eval() | |
| detection_model = YOLO("yolov8m_UrduDoc.pt") | |
| examples = ["1.jpg","2.jpg","3.jpg"] | |
| input = gr.Image(type="pil",image_mode="RGB", label="Input Image") | |
| def predict(input): | |
| "Line Detection" | |
| detection_results = detection_model.predict(source=input, conf=0.2, imgsz=1280, save=False, nms=True, device=device) | |
| bounding_boxes = detection_results[0].boxes.xyxy.cpu().numpy().tolist() | |
| bounding_boxes.sort(key=lambda x: x[1]) | |
| "Draw the bounding boxes" | |
| draw = ImageDraw.Draw(input) | |
| for box in bounding_boxes: | |
| # draw rectangle outline with random color and width=5 | |
| from numpy import random | |
| draw.rectangle(box, fill=None, outline=tuple(random.randint(0,255,3)), width=5) | |
| "Crop the detected lines" | |
| cropped_images = [] | |
| for box in bounding_boxes: | |
| cropped_images.append(input.crop(box)) | |
| len(cropped_images) | |
| "Recognize the text" | |
| texts = [] | |
| for img in cropped_images: | |
| texts.append(text_recognizer(img, recognition_model, converter, device)) | |
| "Join the text" | |
| text = "\n".join(texts) | |
| "Return the image with bounding boxes and the text" | |
| return input,text | |
| output_image = gr.Image(type="pil",image_mode="RGB",label="Detected Lines") | |
| output_text = gr.Textbox(label="Recognized Text",interactive=True,show_copy_button=True) | |
| iface = gr.Interface(predict, | |
| inputs=input, | |
| outputs=[output_image,output_text], | |
| title="End-to-End Urdu OCR", | |
| description="Demo Web App For UTRNet\n(https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition)", | |
| examples=examples, | |
| allow_flagging="never") | |
| iface.launch() |