import numpy as np |
import cv2 |
import torch |
import onnxruntime |
import sys |
import pathlib |
CURRENT_DIR = pathlib.Path(__file__).parent |
sys.path.append(str(CURRENT_DIR)) |
import argparse |
from utils import ( |
letterbox, |
non_max_suppression, |
scale_coords, |
Annotator, |
Colors, |
) |
def pre_process(img): |
img = letterbox(img, [640, 640], stride=32, auto=False)[0] |
img = img.transpose((2, 0, 1))[::-1] |
img = np.ascontiguousarray(img) |
img = img.astype("float32") |
img = img / 255.0 |
img = img[np.newaxis, :] |
return img |
def post_process(x): |
x = list(x) |
z = [] |
stride = [8, 16, 32] |
for i in range(3): |
bs, _, ny, nx = x[i].shape |
x[i] = ( |
torch.tensor(x[i]) |
.view(bs, 3, 85, ny, nx) |
.permute(0, 1, 3, 4, 2) |
.contiguous() |
) |
y = x[i].sigmoid() |
xy = (y[..., 0:2] * 2.0 - 0.5 + grid[i]) * stride[i] |
wh = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] |
y = torch.cat((xy, wh, y[..., 4:]), -1) |
z.append(y.view(bs, -1, 85)) |
return (torch.cat(z, 1), x) |
def make_parser(): |
parser = argparse.ArgumentParser("onnxruntime inference sample") |
parser.add_argument( |
"-m", |
"--onnx_model", |
type=str, |
default="./yolov5s.onnx", |
help="input your onnx model.", |
) |
parser.add_argument( |
"-i", |
"--image_path", |
type=str, |
default='./demo.jpg', |
help="path to your input image.", |
) |
parser.add_argument( |
"-o", |
"--output_path", |
type=str, |
default='./demo_infer.jpg', |
help="path to your output directory.", |
) |
parser.add_argument( |
'--ipu', |
action='store_true', |
help='flag for ryzen ai' |
) |
parser.add_argument( |
'--provider_config', |
default='', |
type=str, |
help='provider config for ryzen ai' |
) |
return parser |
names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', |
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', |
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', |
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', |
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', |
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', |
'hair drier', 'toothbrush'] |
if __name__ == '__main__': |
args = make_parser().parse_args() |
onnx_path = args.onnx_model |
if args.ipu: |
providers = ["VitisAIExecutionProvider"] |
provider_options = [{"config_file": args.provider_config}] |
onnx_weight = onnxruntime.InferenceSession(onnx_path, providers=providers, provider_options=provider_options) |
else: |
onnx_weight = onnxruntime.InferenceSession(onnx_path) |
grid = np.load("./grid.npy", allow_pickle=True) |
anchor_grid = np.load("./anchor_grid.npy", allow_pickle=True) |
path = args.image_path |
new_path = args.output_path |
conf_thres, iou_thres, classes, agnostic_nms, max_det = 0.25, 0.45, None, False, 1000 |
img0 = cv2.imread(path) |
img = pre_process(img0) |
onnx_input = {onnx_weight.get_inputs()[0].name: img.transpose(0, 2, 3, 1)} |
onnx_output = onnx_weight.run(None, onnx_input) |
onnx_output = [torch.tensor(item).permute(0, 3, 1, 2) for item in onnx_output] |
onnx_output = post_process(onnx_output) |
pred = non_max_suppression( |
onnx_output[0], conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det |
) |
colors = Colors() |
det = pred[0] |
im0 = img0.copy() |
annotator = Annotator(im0, line_width=2, example=str(names)) |
if len(det): |
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() |
for *xyxy, conf, cls in reversed(det): |
c = int(cls) |
label = f"{names[c]} {conf:.2f}" |
annotator.box_label(xyxy, label, color=colors(c, True)) |
im0 = annotator.result() |
cv2.imwrite(new_path, im0) |