File size: 3,911 Bytes
48cf7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import cv2
import numpy as np
import onnxruntime as rt
import gradio as gr

def image_preprocess(image):

  img_height, img_width = image.shape[0:2]
  image_converted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w, _ = image.shape # [1944, 2592]

  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  image_resized = cv2.resize(image_converted, (nw, nh))

  image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized # image_padded[80:256, 32:224]
  image_padded = image_padded / 255.
  # image_resized = image_resized / 255.
  image_padded = image_padded[np.newaxis, ...].astype(np.float32)
  image_padded = np.moveaxis(image_padded, -1, 1)


  return image_padded, img_width, img_height, image
  
  
def inference(image_data):
  detections = sess.run(output_names, {input_name: image_data})
  return detections
  
def draw_detections(img, box, score, class_id):

    # Extract the coordinates of the bounding box
    x1, y1, x2, y2 = box

    # Retrieve the color for the class ID
    color = color_palette_pred[class_id]

    # Draw the bounding box on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)

    # Create the label text with class name and score
    label = f'{classes[class_id]}: {score:.2f}'

    # Calculate the dimensions of the label text
    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 2, 2)

    # Calculate the position of the label text
    label_x = x1
    label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

    # Draw a filled rectangle as the background for the label text
    label_x = round(label_x)
    label_y = round(label_y)
    cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)

    # Draw the label text on the image
    cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 1, cv2.LINE_AA)
    
    
def postprocess(detections, img_width, img_height, input_size, original_image):
  boxes = detections[1][0]
  scores = detections[2][0]
  class_ids = detections[3][0]
  # width_scale = img_width / input_size
  # height_scale = img_height / input_size

  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w = img_height, img_width # [1944, 2592]
  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  # dh, dw = 0, 0
  # scale = 0.3292
  new_boxes = []
  for box, score, class_id in zip(boxes, scores, class_ids):

    x1, y1, x2, y2 = box
    x1 = (x1 - dw) / scale
    y1 = (y1 - dh) / scale
    x2 = (x2 - dw) / scale
    y2 = (y2 - dh) / scale
    box = [x1, y1, x2, y2]
    draw_detections(original_image, box, score, class_id)
    new_boxes.append(box)
  return [class_ids, scores, new_boxes]
  
  
# fix all the variables
with open("coco_names.txt", "r") as f:
    content = f.readlines()

content = "".join(content)
classes = content.split("\n")
color_palette_pred = np.random.uniform(0, 255, size=(len(classes), 3))  
  
sess = rt.InferenceSession("yolov8n_coco_640.nms.onnx")

outputs = sess.get_outputs()
output_names = list(map(lambda output: output.name, outputs))
input_name = sess.get_inputs()[0].name
input_size = sess.get_inputs()[0].shape[2]


def run(img_path):
    image_data, img_width, img_height, original_image = image_preprocess(img_path)
    detections = inference(image_data)
    l = postprocess(detections, img_width, img_height, input_size, original_image)
    return original_image  
  
demo = gr.Interface(
    fn=run,
    inputs=["image"],
    outputs=["image"],
)

demo.launch()