Spaces:
Runtime error
Runtime error
| from collections import defaultdict | |
| from loguru import logger | |
| from tqdm import tqdm | |
| import torch | |
| from yolox.utils import ( | |
| gather, | |
| is_main_process, | |
| postprocess, | |
| synchronize, | |
| time_synchronized, | |
| xyxy2xywh | |
| ) | |
| from yolox.tracker.byte_tracker import BYTETracker | |
| from yolox.sort_tracker.sort import Sort | |
| from yolox.deepsort_tracker.deepsort import DeepSort | |
| from yolox.motdt_tracker.motdt_tracker import OnlineTracker | |
| import contextlib | |
| import io | |
| import os | |
| import itertools | |
| import json | |
| import tempfile | |
| import time | |
| def write_results(filename, results): | |
| save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' | |
| with open(filename, 'w') as f: | |
| for frame_id, tlwhs, track_ids, scores in results: | |
| for tlwh, track_id, score in zip(tlwhs, track_ids, scores): | |
| if track_id < 0: | |
| continue | |
| x1, y1, w, h = tlwh | |
| line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) | |
| f.write(line) | |
| logger.info('save results to {}'.format(filename)) | |
| def write_results_no_score(filename, results): | |
| save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' | |
| with open(filename, 'w') as f: | |
| for frame_id, tlwhs, track_ids in results: | |
| for tlwh, track_id in zip(tlwhs, track_ids): | |
| if track_id < 0: | |
| continue | |
| x1, y1, w, h = tlwh | |
| line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1)) | |
| f.write(line) | |
| logger.info('save results to {}'.format(filename)) | |
| class MOTEvaluator: | |
| """ | |
| COCO AP Evaluation class. All the data in the val2017 dataset are processed | |
| and evaluated by COCO API. | |
| """ | |
| def __init__( | |
| self, args, dataloader, img_size, confthre, nmsthre, num_classes): | |
| """ | |
| Args: | |
| dataloader (Dataloader): evaluate dataloader. | |
| img_size (int): image size after preprocess. images are resized | |
| to squares whose shape is (img_size, img_size). | |
| confthre (float): confidence threshold ranging from 0 to 1, which | |
| is defined in the config file. | |
| nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1. | |
| """ | |
| self.dataloader = dataloader | |
| self.img_size = img_size | |
| self.confthre = confthre | |
| self.nmsthre = nmsthre | |
| self.num_classes = num_classes | |
| self.args = args | |
| def evaluate( | |
| self, | |
| model, | |
| distributed=False, | |
| half=False, | |
| trt_file=None, | |
| decoder=None, | |
| test_size=None, | |
| result_folder=None | |
| ): | |
| """ | |
| COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |
| and the results are evaluated by COCO API. | |
| NOTE: This function will change training mode to False, please save states if needed. | |
| Args: | |
| model : model to evaluate. | |
| Returns: | |
| ap50_95 (float) : COCO AP of IoU=50:95 | |
| ap50 (float) : COCO AP of IoU=50 | |
| summary (sr): summary info of evaluation. | |
| """ | |
| # TODO half to amp_test | |
| tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor | |
| model = model.eval() | |
| if half: | |
| model = model.half() | |
| ids = [] | |
| data_list = [] | |
| results = [] | |
| video_names = defaultdict() | |
| progress_bar = tqdm if is_main_process() else iter | |
| inference_time = 0 | |
| track_time = 0 | |
| n_samples = len(self.dataloader) - 1 | |
| if trt_file is not None: | |
| from torch2trt import TRTModule | |
| model_trt = TRTModule() | |
| model_trt.load_state_dict(torch.load(trt_file)) | |
| x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |
| model(x) | |
| model = model_trt | |
| tracker = BYTETracker(self.args) | |
| ori_thresh = self.args.track_thresh | |
| for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |
| progress_bar(self.dataloader) | |
| ): | |
| with torch.no_grad(): | |
| # init tracker | |
| frame_id = info_imgs[2].item() | |
| video_id = info_imgs[3].item() | |
| img_file_name = info_imgs[4] | |
| video_name = img_file_name[0].split('/')[0] | |
| if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN': | |
| self.args.track_buffer = 14 | |
| elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN': | |
| self.args.track_buffer = 25 | |
| else: | |
| self.args.track_buffer = 30 | |
| if video_name == 'MOT17-01-FRCNN': | |
| self.args.track_thresh = 0.65 | |
| elif video_name == 'MOT17-06-FRCNN': | |
| self.args.track_thresh = 0.65 | |
| elif video_name == 'MOT17-12-FRCNN': | |
| self.args.track_thresh = 0.7 | |
| elif video_name == 'MOT17-14-FRCNN': | |
| self.args.track_thresh = 0.67 | |
| else: | |
| self.args.track_thresh = ori_thresh | |
| if video_name == 'MOT20-06' or video_name == 'MOT20-08': | |
| self.args.track_thresh = 0.3 | |
| else: | |
| self.args.track_thresh = ori_thresh | |
| if video_name not in video_names: | |
| video_names[video_id] = video_name | |
| if frame_id == 1: | |
| tracker = BYTETracker(self.args) | |
| if len(results) != 0: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |
| write_results(result_filename, results) | |
| results = [] | |
| imgs = imgs.type(tensor_type) | |
| # skip the the last iters since batchsize might be not enough for batch inference | |
| is_time_record = cur_iter < len(self.dataloader) - 1 | |
| if is_time_record: | |
| start = time.time() | |
| outputs = model(imgs) | |
| if decoder is not None: | |
| outputs = decoder(outputs, dtype=outputs.type()) | |
| outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |
| if is_time_record: | |
| infer_end = time_synchronized() | |
| inference_time += infer_end - start | |
| output_results = self.convert_to_coco_format(outputs, info_imgs, ids) | |
| data_list.extend(output_results) | |
| # run tracking | |
| if outputs[0] is not None: | |
| online_targets = tracker.update(outputs[0], info_imgs, self.img_size) | |
| online_tlwhs = [] | |
| online_ids = [] | |
| online_scores = [] | |
| for t in online_targets: | |
| tlwh = t.tlwh | |
| tid = t.track_id | |
| vertical = tlwh[2] / tlwh[3] > 1.6 | |
| if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: | |
| online_tlwhs.append(tlwh) | |
| online_ids.append(tid) | |
| online_scores.append(t.score) | |
| # save results | |
| results.append((frame_id, online_tlwhs, online_ids, online_scores)) | |
| if is_time_record: | |
| track_end = time_synchronized() | |
| track_time += track_end - infer_end | |
| if cur_iter == len(self.dataloader) - 1: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |
| write_results(result_filename, results) | |
| statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) | |
| if distributed: | |
| data_list = gather(data_list, dst=0) | |
| data_list = list(itertools.chain(*data_list)) | |
| torch.distributed.reduce(statistics, dst=0) | |
| eval_results = self.evaluate_prediction(data_list, statistics) | |
| synchronize() | |
| return eval_results | |
| def evaluate_sort( | |
| self, | |
| model, | |
| distributed=False, | |
| half=False, | |
| trt_file=None, | |
| decoder=None, | |
| test_size=None, | |
| result_folder=None | |
| ): | |
| """ | |
| COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |
| and the results are evaluated by COCO API. | |
| NOTE: This function will change training mode to False, please save states if needed. | |
| Args: | |
| model : model to evaluate. | |
| Returns: | |
| ap50_95 (float) : COCO AP of IoU=50:95 | |
| ap50 (float) : COCO AP of IoU=50 | |
| summary (sr): summary info of evaluation. | |
| """ | |
| # TODO half to amp_test | |
| tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor | |
| model = model.eval() | |
| if half: | |
| model = model.half() | |
| ids = [] | |
| data_list = [] | |
| results = [] | |
| video_names = defaultdict() | |
| progress_bar = tqdm if is_main_process() else iter | |
| inference_time = 0 | |
| track_time = 0 | |
| n_samples = len(self.dataloader) - 1 | |
| if trt_file is not None: | |
| from torch2trt import TRTModule | |
| model_trt = TRTModule() | |
| model_trt.load_state_dict(torch.load(trt_file)) | |
| x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |
| model(x) | |
| model = model_trt | |
| tracker = Sort(self.args.track_thresh) | |
| for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |
| progress_bar(self.dataloader) | |
| ): | |
| with torch.no_grad(): | |
| # init tracker | |
| frame_id = info_imgs[2].item() | |
| video_id = info_imgs[3].item() | |
| img_file_name = info_imgs[4] | |
| video_name = img_file_name[0].split('/')[0] | |
| if video_name not in video_names: | |
| video_names[video_id] = video_name | |
| if frame_id == 1: | |
| tracker = Sort(self.args.track_thresh) | |
| if len(results) != 0: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |
| write_results_no_score(result_filename, results) | |
| results = [] | |
| imgs = imgs.type(tensor_type) | |
| # skip the the last iters since batchsize might be not enough for batch inference | |
| is_time_record = cur_iter < len(self.dataloader) - 1 | |
| if is_time_record: | |
| start = time.time() | |
| outputs = model(imgs) | |
| if decoder is not None: | |
| outputs = decoder(outputs, dtype=outputs.type()) | |
| outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |
| if is_time_record: | |
| infer_end = time_synchronized() | |
| inference_time += infer_end - start | |
| output_results = self.convert_to_coco_format(outputs, info_imgs, ids) | |
| data_list.extend(output_results) | |
| # run tracking | |
| online_targets = tracker.update(outputs[0], info_imgs, self.img_size) | |
| online_tlwhs = [] | |
| online_ids = [] | |
| for t in online_targets: | |
| tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]] | |
| tid = t[4] | |
| vertical = tlwh[2] / tlwh[3] > 1.6 | |
| if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: | |
| online_tlwhs.append(tlwh) | |
| online_ids.append(tid) | |
| # save results | |
| results.append((frame_id, online_tlwhs, online_ids)) | |
| if is_time_record: | |
| track_end = time_synchronized() | |
| track_time += track_end - infer_end | |
| if cur_iter == len(self.dataloader) - 1: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |
| write_results_no_score(result_filename, results) | |
| statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) | |
| if distributed: | |
| data_list = gather(data_list, dst=0) | |
| data_list = list(itertools.chain(*data_list)) | |
| torch.distributed.reduce(statistics, dst=0) | |
| eval_results = self.evaluate_prediction(data_list, statistics) | |
| synchronize() | |
| return eval_results | |
| def evaluate_deepsort( | |
| self, | |
| model, | |
| distributed=False, | |
| half=False, | |
| trt_file=None, | |
| decoder=None, | |
| test_size=None, | |
| result_folder=None, | |
| model_folder=None | |
| ): | |
| """ | |
| COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |
| and the results are evaluated by COCO API. | |
| NOTE: This function will change training mode to False, please save states if needed. | |
| Args: | |
| model : model to evaluate. | |
| Returns: | |
| ap50_95 (float) : COCO AP of IoU=50:95 | |
| ap50 (float) : COCO AP of IoU=50 | |
| summary (sr): summary info of evaluation. | |
| """ | |
| # TODO half to amp_test | |
| tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor | |
| model = model.eval() | |
| if half: | |
| model = model.half() | |
| ids = [] | |
| data_list = [] | |
| results = [] | |
| video_names = defaultdict() | |
| progress_bar = tqdm if is_main_process() else iter | |
| inference_time = 0 | |
| track_time = 0 | |
| n_samples = len(self.dataloader) - 1 | |
| if trt_file is not None: | |
| from torch2trt import TRTModule | |
| model_trt = TRTModule() | |
| model_trt.load_state_dict(torch.load(trt_file)) | |
| x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |
| model(x) | |
| model = model_trt | |
| tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) | |
| for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |
| progress_bar(self.dataloader) | |
| ): | |
| with torch.no_grad(): | |
| # init tracker | |
| frame_id = info_imgs[2].item() | |
| video_id = info_imgs[3].item() | |
| img_file_name = info_imgs[4] | |
| video_name = img_file_name[0].split('/')[0] | |
| if video_name not in video_names: | |
| video_names[video_id] = video_name | |
| if frame_id == 1: | |
| tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) | |
| if len(results) != 0: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |
| write_results_no_score(result_filename, results) | |
| results = [] | |
| imgs = imgs.type(tensor_type) | |
| # skip the the last iters since batchsize might be not enough for batch inference | |
| is_time_record = cur_iter < len(self.dataloader) - 1 | |
| if is_time_record: | |
| start = time.time() | |
| outputs = model(imgs) | |
| if decoder is not None: | |
| outputs = decoder(outputs, dtype=outputs.type()) | |
| outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |
| if is_time_record: | |
| infer_end = time_synchronized() | |
| inference_time += infer_end - start | |
| output_results = self.convert_to_coco_format(outputs, info_imgs, ids) | |
| data_list.extend(output_results) | |
| # run tracking | |
| online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0]) | |
| online_tlwhs = [] | |
| online_ids = [] | |
| for t in online_targets: | |
| tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]] | |
| tid = t[4] | |
| vertical = tlwh[2] / tlwh[3] > 1.6 | |
| if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: | |
| online_tlwhs.append(tlwh) | |
| online_ids.append(tid) | |
| # save results | |
| results.append((frame_id, online_tlwhs, online_ids)) | |
| if is_time_record: | |
| track_end = time_synchronized() | |
| track_time += track_end - infer_end | |
| if cur_iter == len(self.dataloader) - 1: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |
| write_results_no_score(result_filename, results) | |
| statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) | |
| if distributed: | |
| data_list = gather(data_list, dst=0) | |
| data_list = list(itertools.chain(*data_list)) | |
| torch.distributed.reduce(statistics, dst=0) | |
| eval_results = self.evaluate_prediction(data_list, statistics) | |
| synchronize() | |
| return eval_results | |
| def evaluate_motdt( | |
| self, | |
| model, | |
| distributed=False, | |
| half=False, | |
| trt_file=None, | |
| decoder=None, | |
| test_size=None, | |
| result_folder=None, | |
| model_folder=None | |
| ): | |
| """ | |
| COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |
| and the results are evaluated by COCO API. | |
| NOTE: This function will change training mode to False, please save states if needed. | |
| Args: | |
| model : model to evaluate. | |
| Returns: | |
| ap50_95 (float) : COCO AP of IoU=50:95 | |
| ap50 (float) : COCO AP of IoU=50 | |
| summary (sr): summary info of evaluation. | |
| """ | |
| # TODO half to amp_test | |
| tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor | |
| model = model.eval() | |
| if half: | |
| model = model.half() | |
| ids = [] | |
| data_list = [] | |
| results = [] | |
| video_names = defaultdict() | |
| progress_bar = tqdm if is_main_process() else iter | |
| inference_time = 0 | |
| track_time = 0 | |
| n_samples = len(self.dataloader) - 1 | |
| if trt_file is not None: | |
| from torch2trt import TRTModule | |
| model_trt = TRTModule() | |
| model_trt.load_state_dict(torch.load(trt_file)) | |
| x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |
| model(x) | |
| model = model_trt | |
| tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) | |
| for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |
| progress_bar(self.dataloader) | |
| ): | |
| with torch.no_grad(): | |
| # init tracker | |
| frame_id = info_imgs[2].item() | |
| video_id = info_imgs[3].item() | |
| img_file_name = info_imgs[4] | |
| video_name = img_file_name[0].split('/')[0] | |
| if video_name not in video_names: | |
| video_names[video_id] = video_name | |
| if frame_id == 1: | |
| tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) | |
| if len(results) != 0: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |
| write_results(result_filename, results) | |
| results = [] | |
| imgs = imgs.type(tensor_type) | |
| # skip the the last iters since batchsize might be not enough for batch inference | |
| is_time_record = cur_iter < len(self.dataloader) - 1 | |
| if is_time_record: | |
| start = time.time() | |
| outputs = model(imgs) | |
| if decoder is not None: | |
| outputs = decoder(outputs, dtype=outputs.type()) | |
| outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |
| if is_time_record: | |
| infer_end = time_synchronized() | |
| inference_time += infer_end - start | |
| output_results = self.convert_to_coco_format(outputs, info_imgs, ids) | |
| data_list.extend(output_results) | |
| # run tracking | |
| online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0]) | |
| online_tlwhs = [] | |
| online_ids = [] | |
| online_scores = [] | |
| for t in online_targets: | |
| tlwh = t.tlwh | |
| tid = t.track_id | |
| vertical = tlwh[2] / tlwh[3] > 1.6 | |
| if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: | |
| online_tlwhs.append(tlwh) | |
| online_ids.append(tid) | |
| online_scores.append(t.score) | |
| # save results | |
| results.append((frame_id, online_tlwhs, online_ids, online_scores)) | |
| if is_time_record: | |
| track_end = time_synchronized() | |
| track_time += track_end - infer_end | |
| if cur_iter == len(self.dataloader) - 1: | |
| result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |
| write_results(result_filename, results) | |
| statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) | |
| if distributed: | |
| data_list = gather(data_list, dst=0) | |
| data_list = list(itertools.chain(*data_list)) | |
| torch.distributed.reduce(statistics, dst=0) | |
| eval_results = self.evaluate_prediction(data_list, statistics) | |
| synchronize() | |
| return eval_results | |
| def convert_to_coco_format(self, outputs, info_imgs, ids): | |
| data_list = [] | |
| for (output, img_h, img_w, img_id) in zip( | |
| outputs, info_imgs[0], info_imgs[1], ids | |
| ): | |
| if output is None: | |
| continue | |
| output = output.cpu() | |
| bboxes = output[:, 0:4] | |
| # preprocessing: resize | |
| scale = min( | |
| self.img_size[0] / float(img_h), self.img_size[1] / float(img_w) | |
| ) | |
| bboxes /= scale | |
| bboxes = xyxy2xywh(bboxes) | |
| cls = output[:, 6] | |
| scores = output[:, 4] * output[:, 5] | |
| for ind in range(bboxes.shape[0]): | |
| label = self.dataloader.dataset.class_ids[int(cls[ind])] | |
| pred_data = { | |
| "image_id": int(img_id), | |
| "category_id": label, | |
| "bbox": bboxes[ind].numpy().tolist(), | |
| "score": scores[ind].numpy().item(), | |
| "segmentation": [], | |
| } # COCO json format | |
| data_list.append(pred_data) | |
| return data_list | |
| def evaluate_prediction(self, data_dict, statistics): | |
| if not is_main_process(): | |
| return 0, 0, None | |
| logger.info("Evaluate in main process...") | |
| annType = ["segm", "bbox", "keypoints"] | |
| inference_time = statistics[0].item() | |
| track_time = statistics[1].item() | |
| n_samples = statistics[2].item() | |
| a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size) | |
| a_track_time = 1000 * track_time / (n_samples * self.dataloader.batch_size) | |
| time_info = ", ".join( | |
| [ | |
| "Average {} time: {:.2f} ms".format(k, v) | |
| for k, v in zip( | |
| ["forward", "track", "inference"], | |
| [a_infer_time, a_track_time, (a_infer_time + a_track_time)], | |
| ) | |
| ] | |
| ) | |
| info = time_info + "\n" | |
| # Evaluate the Dt (detection) json comparing with the ground truth | |
| if len(data_dict) > 0: | |
| cocoGt = self.dataloader.dataset.coco | |
| # TODO: since pycocotools can't process dict in py36, write data to json file. | |
| _, tmp = tempfile.mkstemp() | |
| json.dump(data_dict, open(tmp, "w")) | |
| cocoDt = cocoGt.loadRes(tmp) | |
| ''' | |
| try: | |
| from yolox.layers import COCOeval_opt as COCOeval | |
| except ImportError: | |
| from pycocotools import cocoeval as COCOeval | |
| logger.warning("Use standard COCOeval.") | |
| ''' | |
| #from pycocotools.cocoeval import COCOeval | |
| from yolox.layers import COCOeval_opt as COCOeval | |
| cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) | |
| cocoEval.evaluate() | |
| cocoEval.accumulate() | |
| redirect_string = io.StringIO() | |
| with contextlib.redirect_stdout(redirect_string): | |
| cocoEval.summarize() | |
| info += redirect_string.getvalue() | |
| return cocoEval.stats[0], cocoEval.stats[1], info | |
| else: | |
| return 0, 0, info | |