File size: 4,622 Bytes
48cf7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fdf98e
 
 
 
 
48cf7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9481ef1
48cf7a9
 
 
 
 
 
 
 
 
 
 
9481ef1
48cf7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cec17f7
 
 
 
 
0fdf98e
48cf7a9
 
18d4e5f
48cf7a9
0fdf98e
 
48cf7a9
 
 
 
 
446a0e5
48cf7a9
8e12d17
48cf7a9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import cv2
import numpy as np
import onnxruntime as rt
import gradio as gr

def image_preprocess(image):

  img_height, img_width = image.shape[0:2]
  image_converted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w, _ = image.shape # [1944, 2592]

  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  image_resized = cv2.resize(image_converted, (nw, nh))

  image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized # image_padded[80:256, 32:224]
  image_padded = image_padded / 255.
  # image_resized = image_resized / 255.
  image_padded = image_padded[np.newaxis, ...].astype(np.float32)
  image_padded = np.moveaxis(image_padded, -1, 1)


  return image_padded, img_width, img_height, image
  
  
def inference(model_name, image_data):
  sess = rt.InferenceSession(model_name)
  outputs = sess.get_outputs()
  output_names = list(map(lambda output: output.name, outputs))
  input_name = sess.get_inputs()[0].name
  detections = sess.run(output_names, {input_name: image_data})
  return detections
  
def draw_detections(img, box, score, class_id):

    # Extract the coordinates of the bounding box
    x1, y1, x2, y2 = box

    # Retrieve the color for the class ID
    color = color_palette_pred[class_id]

    # Draw the bounding box on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)

    # Create the label text with class name and score
    label = f'{classes[class_id]}: {score:.2f}'

    # Calculate the dimensions of the label text
    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

    # Calculate the position of the label text
    label_x = x1
    label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

    # Draw a filled rectangle as the background for the label text
    label_x = round(label_x)
    label_y = round(label_y)
    cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)

    # Draw the label text on the image
    cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
    
    
def postprocess(detections, img_width, img_height, input_size, original_image):
  boxes = detections[1][0]
  scores = detections[2][0]
  class_ids = detections[3][0]
  # width_scale = img_width / input_size
  # height_scale = img_height / input_size

  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w = img_height, img_width # [1944, 2592]
  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  # dh, dw = 0, 0
  # scale = 0.3292
  new_boxes = []
  for box, score, class_id in zip(boxes, scores, class_ids):

    x1, y1, x2, y2 = box
    x1 = (x1 - dw) / scale
    y1 = (y1 - dh) / scale
    x2 = (x2 - dw) / scale
    y2 = (y2 - dh) / scale
    box = [x1, y1, x2, y2]
    draw_detections(original_image, box, score, class_id)
    new_boxes.append(box)
  return [class_ids, scores, new_boxes]
  
  
# fix all the variables
with open("coco_names.txt", "r") as f:
    content = f.readlines()

content = "".join(content)
classes = content.split("\n")
color_palette_pred = np.random.uniform(0, 255, size=(len(classes), 3))  
  
model_names = {"YOLOv8n":"yolov8n 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8s":"yolov8s 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8m":"yolov8m 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8l":"yolov8l 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8x":"yolov8x 640 mask_300000_3000_0.3_0.7.onnx"}
input_size = 640


def run(img_path, model_ind:int):
    image_data, img_width, img_height, original_image = image_preprocess(img_path)
    model_name = model_names[model_ind]
    detections = inference(model_name, image_data)
    l = postprocess(detections, img_width, img_height, input_size, original_image)
    return original_image  
  
demo = gr.Interface(
    fn=run,
    inputs=["image", gr.Dropdown(["YOLOv8n", "YOLOv8s", "YOLOv8m", "YOLOv8l", "YOLOv8x"], label="Model", value="YOLOv8n", info="The larger the model, the slower and more performant it is.")],
    outputs=["image"],
    examples=[["crowd.jpeg", "YOLOv8n"],["crowd.jpeg", "YOLOv8s"],["crowd.jpeg", "YOLOv8m"],["crowd.jpeg", "YOLOv8l"],["crowd.jpeg", "YOLOv8x"],]
)

demo.launch()