Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import cv2 | |
| import requests | |
| import os | |
| import numpy as np | |
| from math import atan2, cos, sin, sqrt, pi | |
| from ultralytics import YOLO | |
| def drawAxis(img, p_, q_, color, scale): | |
| p = list(p_) | |
| q = list(q_) | |
| ## [visualization1] | |
| angle = atan2(p[1] - q[1], p[0] - q[0]) # angle in radians | |
| hypotenuse = sqrt((p[1] - q[1]) * (p[1] - q[1]) + (p[0] - q[0]) * (p[0] - q[0])) | |
| # Here we lengthen the arrow by a factor of scale | |
| q[0] = p[0] - scale * hypotenuse/2 * cos(angle) | |
| q[1] = p[1] - scale * hypotenuse/2 * sin(angle) | |
| cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv2.LINE_AA) | |
| # create the arrow hooks | |
| p[0] = q[0] + 9 * cos(angle + pi / 4) | |
| p[1] = q[1] + 9 * sin(angle + pi / 4) | |
| cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv2.LINE_AA) | |
| p[0] = q[0] + 9 * cos(angle - pi / 4) | |
| p[1] = q[1] + 9 * sin(angle - pi / 4) | |
| cv2.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv2.LINE_AA) | |
| ## [visualization1] | |
| def getOrientation(pts, img): | |
| ## [pca] | |
| # Construct a buffer used by the pca analysis | |
| sz = len(pts) | |
| data_pts = np.empty((sz, 2), dtype=np.float64) | |
| for i in range(data_pts.shape[0]): | |
| data_pts[i,0] = pts[i,0,0] | |
| data_pts[i,1] = pts[i,0,1] | |
| # Perform PCA analysis | |
| mean = np.empty((0)) | |
| mean, eigenvectors, eigenvalues = cv2.PCACompute2(data_pts, mean) | |
| # Store the center of the object | |
| cntr = (int(mean[0,0]), int(mean[0,1])) | |
| ## [pca] | |
| ## [visualization] | |
| # Draw the principal components | |
| cv2.circle(img, cntr, 3, (255, 0, 255), 10) | |
| p1 = (cntr[0] + 0.02 * eigenvectors[0,0] * eigenvalues[0,0], cntr[1] + 0.02 * eigenvectors[0,1] * eigenvalues[0,0]) | |
| p2 = (cntr[0] - 0.02 * eigenvectors[1,0] * eigenvalues[1,0], cntr[1] - 0.02 * eigenvectors[1,1] * eigenvalues[1,0]) | |
| drawAxis(img, cntr, p1, (255, 255, 0), 1) | |
| drawAxis(img, cntr, p2, (255, 255, 0), 4) | |
| angle = atan2(eigenvectors[0,1], eigenvectors[0,0]) # orientation in radians | |
| ## [visualization] | |
| angle_deg = -(int(np.rad2deg(angle))-180) % 180 | |
| # # Label with the rotation angle | |
| # label = str(int(np.rad2deg(angle))) + " deg" | |
| # textbox = cv2.rectangle(img, (cntr[0]+60, cntr[1]-25), (cntr[0] + 150, cntr[1] + 10), (255,255,255), -1) | |
| # cv2.putText(img, label, (cntr[0]+60, cntr[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv2.LINE_AA) | |
| return angle_deg | |
| file_urls = [ | |
| 'https://github.com/lucarei/orientation-detection-robotic-grasping/assets/22428774/cefd9731-c57c-428b-b401-fd54a8bd0a95', | |
| 'https://github.com/lucarei/orientation-detection-robotic-grasping/assets/22428774/acbad76a-33f9-4028-b012-4ece5998c272', | |
| 'https://github.com/lucarei/orientation-detection-robotic-grasping/assets/22428774/ce8a0fb9-99ea-4952-bcc4-3afa023066d9', | |
| 'https://dl.dropboxusercontent.com/scl/fi/flbf7vvoxgzoe9adovadm/video-540p.mp4?dl=0&rlkey=jbecmpu727q7yirvquma9m7w2' | |
| ] | |
| def download_file(url, save_name): | |
| url = url | |
| if not os.path.exists(save_name): | |
| file = requests.get(url) | |
| open(save_name, 'wb').write(file.content) | |
| for i, url in enumerate(file_urls): | |
| if 'mp4' in file_urls[i]: | |
| download_file( | |
| file_urls[i], | |
| f"video.mp4" | |
| ) | |
| else: | |
| download_file( | |
| file_urls[i], | |
| f"image_{i}.jpg" | |
| ) | |
| model = YOLO('best.pt') | |
| path = [['image_0.jpg'], ['image_1.jpg'], ['image_2.jpg']] | |
| video_path = [['video.mp4']] | |
| def show_preds_image(image_path): | |
| image = cv2.imread(image_path) | |
| #resize image (optional) | |
| img_res_toshow = cv2.resize(image, None, fx= 0.5, fy= 0.5, interpolation= cv2.INTER_LINEAR) | |
| height=img_res_toshow.shape[0] | |
| width=img_res_toshow.shape[1] | |
| dim=(width,height) | |
| outputs = model.predict(source=img_res_toshow,conf=0.4) | |
| boxes = outputs[0].boxes.cls | |
| class_list = [] | |
| for class_n in boxes.cpu().numpy(): | |
| class_list.append(outputs[0].names[class_n]) | |
| angle_list = [] | |
| for object in range(len(outputs[0].masks.masks)): | |
| #obtain BW image | |
| bw=(outputs[0].masks.masks[object].cpu().numpy() * 255).astype("uint8") | |
| #BW image with same dimention of initial image | |
| bw=cv2.resize(bw, dim, interpolation = cv2.INTER_AREA) | |
| img=img_res_toshow | |
| contours, _ = cv2.findContours(bw, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) | |
| for i, c in enumerate(contours): | |
| # Calculate the area of each contour | |
| area = cv2.contourArea(c) | |
| # Ignore contours that are too small or too large | |
| if area < 2500 or 500000 < area: | |
| continue | |
| # Draw each contour only for visualisation purposes | |
| cv2.drawContours(img, contours, i, (0, 0, 255), 2) | |
| # Find the orientation of each shape | |
| angle_deg = getOrientation(c, img) | |
| angle_list.append(angle_deg) | |
| # Create a text string for the orientation of the objects and the class of the objects | |
| text = "" | |
| for i in range(len(angle_list)): | |
| text = text + "Object " + str(i+1) + ": " + str(angle_list[i]) + " deg, " + class_list[i] + "\n" | |
| results = outputs[0].cpu().numpy() | |
| for i, det in enumerate(results.boxes.xyxy): | |
| cv2.rectangle( | |
| img, | |
| (int(det[0]), int(det[1])), | |
| (int(det[2]), int(det[3])), | |
| color=(255, 0, 0), | |
| thickness=2, | |
| lineType=cv2.LINE_AA | |
| ) | |
| return cv2.cvtColor(img, cv2.COLOR_BGR2RGB), text | |
| inputs_image = [ | |
| gr.components.Image(type="filepath", label="Input Image"), | |
| ] | |
| outputs_image = [ | |
| gr.components.Image(type="numpy", label="Output Image"), | |
| gr.outputs.Textbox(label="Orientation Angle") | |
| ] | |
| interface_image = gr.Interface( | |
| fn=show_preds_image, | |
| inputs=inputs_image, | |
| outputs=outputs_image, | |
| title="Trash Detection with Orientation", | |
| examples=path, | |
| cache_examples=False, | |
| ) | |
| def show_preds_video(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| while(cap.isOpened()): | |
| ret, frame = cap.read() | |
| if ret: | |
| frame_copy = frame.copy() | |
| #resize image (optional) | |
| img_res_toshow = cv2.resize(frame_copy, None, fx= 0.5, fy= 0.5, interpolation= cv2.INTER_LINEAR) | |
| height=img_res_toshow.shape[0] | |
| width=img_res_toshow.shape[1] | |
| dim=(width,height) | |
| outputs = model.predict(source=img_res_toshow,conf=0.4) | |
| for object in range(len(outputs[0].masks.masks)): | |
| #obtain BW image | |
| bw=(outputs[0].masks.masks[object].cpu().numpy() * 255).astype("uint8") | |
| #BW image with same dimention of initial image | |
| bw=cv2.resize(bw, dim, interpolation = cv2.INTER_AREA) | |
| img=img_res_toshow | |
| contours, _ = cv2.findContours(bw, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) | |
| for i, c in enumerate(contours): | |
| # Calculate the area of each contour | |
| area = cv2.contourArea(c) | |
| # Ignore contours that are too small or too large | |
| if area < 2500 or 500000 < area: | |
| continue | |
| # Draw each contour only for visualisation purposes | |
| cv2.drawContours(img, contours, i, (0, 0, 255), 2) | |
| # Find the orientation of each shape | |
| angle_deg = getOrientation(c, img) | |
| results = outputs[0].cpu().numpy() | |
| for i, det in enumerate(results.boxes.xyxy): | |
| cv2.rectangle( | |
| img, | |
| (int(det[0]), int(det[1])), | |
| (int(det[2]), int(det[3])), | |
| color=(255, 0, 0), | |
| thickness=2, | |
| lineType=cv2.LINE_AA | |
| ) | |
| yield cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| inputs_video = [ | |
| gr.components.Video(type="filepath", label="Input Video"), | |
| ] | |
| outputs_video = [ | |
| gr.components.Image(type="numpy", label="Output Image"), | |
| ] | |
| interface_video = gr.Interface( | |
| fn=show_preds_video, | |
| inputs=inputs_video, | |
| outputs=outputs_video, | |
| title="Trash Detection with Orientation", | |
| examples=video_path, | |
| cache_examples=False, | |
| ) | |
| gr.TabbedInterface( | |
| [interface_image, interface_video], | |
| tab_names=['Image inference', 'Video inference'] | |
| ).queue().launch() |