MahmoudAbdelmaged commited on
Commit
eed7393
·
verified ·
1 Parent(s): 16342e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -24
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import torch
2
- import torchvision.transforms as transforms
 
3
  import numpy as np
4
  from PIL import Image, ImageDraw
5
- import easyocr
6
  import gradio as gr
7
 
8
  # Load YOLOv9 model
@@ -12,50 +12,73 @@ model.eval() # Set to evaluation mode
12
  # Initialize EasyOCR reader
13
  reader = easyocr.Reader(['en', 'ar'], gpu=False)
14
 
15
- # Define a transformation pipeline
16
- transform = transforms.Compose([
17
- transforms.Resize((640, 640)), # Resize to model's expected input size
18
- transforms.ToTensor(), # Convert PIL Image to Tensor
19
- ])
 
 
 
 
 
 
 
20
 
21
- def detect_objects(image):
22
- img = Image.fromarray(image) # Convert numpy array to PIL Image
23
- img_tensor = transform(img).unsqueeze(0) # Convert to Tensor and add batch dim
24
 
 
25
  with torch.no_grad():
26
  results = model(img_tensor) # Run inference
27
 
28
- # Convert tensor output to numpy and extract bounding boxes
 
29
  draw = ImageDraw.Draw(img)
30
 
31
- extracted_texts = [] # To store the detected texts
32
 
33
  if isinstance(results, tuple): # Ensure we are handling correct output
34
  boxes = results[0] # Adjust based on actual YOLO output format
35
-
36
  # Iterate through the boxes and draw rectangles
37
  for box in boxes:
38
  if isinstance(box, torch.Tensor): # Ensure that 'box' is a tensor
39
  box = box.cpu().numpy() # Convert tensor to numpy array
40
  x1, y1, x2, y2 = box[:4] # Extract coordinates
41
-
42
  # Crop the image inside the bounding box
43
- cropped_img = img.crop((x1, y1, x2, y2))
44
 
45
  # Run OCR on the cropped image
46
  ocr_result = reader.readtext(np.array(cropped_img))
47
-
48
  # Extract the text from the OCR result
49
  for detection in ocr_result:
50
- extracted_texts.append(detection[1]) # Append the detected text
51
 
52
- # Draw bounding box
53
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
54
 
55
- # Optionally, print or return the extracted texts
56
- print("Extracted Texts:", extracted_texts)
57
-
58
- return img, extracted_texts # Return image with bounding boxes and extracted text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- iface = gr.Interface(fn=detect_objects, inputs="image", outputs=["image", "text"])
61
- iface.launch()
 
1
  import torch
2
+ import cv2
3
+ import easyocr
4
  import numpy as np
5
  from PIL import Image, ImageDraw
 
6
  import gradio as gr
7
 
8
  # Load YOLOv9 model
 
12
  # Initialize EasyOCR reader
13
  reader = easyocr.Reader(['en', 'ar'], gpu=False)
14
 
15
+ # Define a transformation pipeline for YOLOv9
16
+ def transform_image(image):
17
+ transform = transforms.Compose([
18
+ transforms.Resize((640, 640)), # Resize to model's expected input size
19
+ transforms.ToTensor(), # Convert PIL Image to Tensor
20
+ ])
21
+ return transform(image).unsqueeze(0)
22
+
23
+ # Function to process the uploaded image and extract text
24
+ def extract_text_from_image(image):
25
+ # Convert numpy array to PIL Image
26
+ img_pil = Image.fromarray(image)
27
 
28
+ # Transform image for YOLOv9 input
29
+ img_tensor = transform_image(img_pil)
 
30
 
31
+ # Run YOLOv9 model to detect objects
32
  with torch.no_grad():
33
  results = model(img_tensor) # Run inference
34
 
35
+ # Draw bounding boxes and extract text
36
+ img = np.array(img_pil)
37
  draw = ImageDraw.Draw(img)
38
 
39
+ extracted_text = [] # To store extracted text
40
 
41
  if isinstance(results, tuple): # Ensure we are handling correct output
42
  boxes = results[0] # Adjust based on actual YOLO output format
43
+
44
  # Iterate through the boxes and draw rectangles
45
  for box in boxes:
46
  if isinstance(box, torch.Tensor): # Ensure that 'box' is a tensor
47
  box = box.cpu().numpy() # Convert tensor to numpy array
48
  x1, y1, x2, y2 = box[:4] # Extract coordinates
49
+
50
  # Crop the image inside the bounding box
51
+ cropped_img = img_pil.crop((x1, y1, x2, y2))
52
 
53
  # Run OCR on the cropped image
54
  ocr_result = reader.readtext(np.array(cropped_img))
55
+
56
  # Extract the text from the OCR result
57
  for detection in ocr_result:
58
+ extracted_text.append(detection[1]) # Append the detected text
59
 
60
+ # Draw bounding box on the image
61
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
62
 
63
+ # Convert the image to RGB (Gradio requires RGB format)
64
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2RGB)
65
+
66
+ # Join the extracted text into a single string with line breaks
67
+ extracted_text_str = "\n".join(extracted_text)
68
+
69
+ return img, extracted_text_str
70
+
71
+ # Define the Gradio interface
72
+ interface = gr.Interface(
73
+ fn=extract_text_from_image,
74
+ inputs=gr.Image(type="numpy", label="Upload Image"),
75
+ outputs=[
76
+ gr.Image(type="numpy", label="Processed Image"),
77
+ gr.Text(label="Extracted Text (Line by Line)")
78
+ ],
79
+ title="Object and Text Extractor",
80
+ description="Upload an image to detect objects using YOLOv9 and extract text using EasyOCR.",
81
+ )
82
 
83
+ # Launch the Gradio app
84
+ interface.launch()