Spaces:

ECCV2022
/

bytetrack

Runtime error

bytetrack / yolox /evaluators /mot_evaluator.py

AK391

all files

7734d5b about 4 years ago

25.3 kB

	from collections import defaultdict
	from loguru import logger
	from tqdm import tqdm

	import torch

	from yolox.utils import (
	gather,
	is_main_process,
	postprocess,
	synchronize,
	time_synchronized,
	xyxy2xywh
	)
	from yolox.tracker.byte_tracker import BYTETracker
	from yolox.sort_tracker.sort import Sort
	from yolox.deepsort_tracker.deepsort import DeepSort
	from yolox.motdt_tracker.motdt_tracker import OnlineTracker

	import contextlib
	import io
	import os
	import itertools
	import json
	import tempfile
	import time


	def write_results(filename, results):
	save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n'
	with open(filename, 'w') as f:
	for frame_id, tlwhs, track_ids, scores in results:
	for tlwh, track_id, score in zip(tlwhs, track_ids, scores):
	if track_id < 0:
	continue
	x1, y1, w, h = tlwh
	line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
	f.write(line)
	logger.info('save results to {}'.format(filename))


	def write_results_no_score(filename, results):
	save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
	with open(filename, 'w') as f:
	for frame_id, tlwhs, track_ids in results:
	for tlwh, track_id in zip(tlwhs, track_ids):
	if track_id < 0:
	continue
	x1, y1, w, h = tlwh
	line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1))
	f.write(line)
	logger.info('save results to {}'.format(filename))


	class MOTEvaluator:
	"""
	COCO AP Evaluation class. All the data in the val2017 dataset are processed
	and evaluated by COCO API.
	"""

	def __init__(
	self, args, dataloader, img_size, confthre, nmsthre, num_classes):
	"""
	Args:
	dataloader (Dataloader): evaluate dataloader.
	img_size (int): image size after preprocess. images are resized
	to squares whose shape is (img_size, img_size).
	confthre (float): confidence threshold ranging from 0 to 1, which
	is defined in the config file.
	nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1.
	"""
	self.dataloader = dataloader
	self.img_size = img_size
	self.confthre = confthre
	self.nmsthre = nmsthre
	self.num_classes = num_classes
	self.args = args

	def evaluate(
	self,
	model,
	distributed=False,
	half=False,
	trt_file=None,
	decoder=None,
	test_size=None,
	result_folder=None
	):
	"""
	COCO average precision (AP) Evaluation. Iterate inference on the test dataset
	and the results are evaluated by COCO API.

	NOTE: This function will change training mode to False, please save states if needed.

	Args:
	model : model to evaluate.

	Returns:
	ap50_95 (float) : COCO AP of IoU=50:95
	ap50 (float) : COCO AP of IoU=50
	summary (sr): summary info of evaluation.
	"""
	# TODO half to amp_test
	tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
	model = model.eval()
	if half:
	model = model.half()
	ids = []
	data_list = []
	results = []
	video_names = defaultdict()
	progress_bar = tqdm if is_main_process() else iter

	inference_time = 0
	track_time = 0
	n_samples = len(self.dataloader) - 1

	if trt_file is not None:
	from torch2trt import TRTModule

	model_trt = TRTModule()
	model_trt.load_state_dict(torch.load(trt_file))

	x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
	model(x)
	model = model_trt

	tracker = BYTETracker(self.args)
	ori_thresh = self.args.track_thresh
	for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
	progress_bar(self.dataloader)
	):
	with torch.no_grad():
	# init tracker
	frame_id = info_imgs[2].item()
	video_id = info_imgs[3].item()
	img_file_name = info_imgs[4]
	video_name = img_file_name[0].split('/')[0]
	if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
	self.args.track_buffer = 14
	elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
	self.args.track_buffer = 25
	else:
	self.args.track_buffer = 30

	if video_name == 'MOT17-01-FRCNN':
	self.args.track_thresh = 0.65
	elif video_name == 'MOT17-06-FRCNN':
	self.args.track_thresh = 0.65
	elif video_name == 'MOT17-12-FRCNN':
	self.args.track_thresh = 0.7
	elif video_name == 'MOT17-14-FRCNN':
	self.args.track_thresh = 0.67
	else:
	self.args.track_thresh = ori_thresh

	if video_name == 'MOT20-06' or video_name == 'MOT20-08':
	self.args.track_thresh = 0.3
	else:
	self.args.track_thresh = ori_thresh

	if video_name not in video_names:
	video_names[video_id] = video_name
	if frame_id == 1:
	tracker = BYTETracker(self.args)
	if len(results) != 0:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
	write_results(result_filename, results)
	results = []

	imgs = imgs.type(tensor_type)

	# skip the the last iters since batchsize might be not enough for batch inference
	is_time_record = cur_iter < len(self.dataloader) - 1
	if is_time_record:
	start = time.time()

	outputs = model(imgs)
	if decoder is not None:
	outputs = decoder(outputs, dtype=outputs.type())

	outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

	if is_time_record:
	infer_end = time_synchronized()
	inference_time += infer_end - start

	output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
	data_list.extend(output_results)

	# run tracking
	if outputs[0] is not None:
	online_targets = tracker.update(outputs[0], info_imgs, self.img_size)
	online_tlwhs = []
	online_ids = []
	online_scores = []
	for t in online_targets:
	tlwh = t.tlwh
	tid = t.track_id
	vertical = tlwh[2] / tlwh[3] > 1.6
	if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
	online_tlwhs.append(tlwh)
	online_ids.append(tid)
	online_scores.append(t.score)
	# save results
	results.append((frame_id, online_tlwhs, online_ids, online_scores))

	if is_time_record:
	track_end = time_synchronized()
	track_time += track_end - infer_end

	if cur_iter == len(self.dataloader) - 1:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
	write_results(result_filename, results)

	statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
	if distributed:
	data_list = gather(data_list, dst=0)
	data_list = list(itertools.chain(*data_list))
	torch.distributed.reduce(statistics, dst=0)

	eval_results = self.evaluate_prediction(data_list, statistics)
	synchronize()
	return eval_results

	def evaluate_sort(
	self,
	model,
	distributed=False,
	half=False,
	trt_file=None,
	decoder=None,
	test_size=None,
	result_folder=None
	):
	"""
	COCO average precision (AP) Evaluation. Iterate inference on the test dataset
	and the results are evaluated by COCO API.

	NOTE: This function will change training mode to False, please save states if needed.

	Args:
	model : model to evaluate.

	Returns:
	ap50_95 (float) : COCO AP of IoU=50:95
	ap50 (float) : COCO AP of IoU=50
	summary (sr): summary info of evaluation.
	"""
	# TODO half to amp_test
	tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
	model = model.eval()
	if half:
	model = model.half()
	ids = []
	data_list = []
	results = []
	video_names = defaultdict()
	progress_bar = tqdm if is_main_process() else iter

	inference_time = 0
	track_time = 0
	n_samples = len(self.dataloader) - 1

	if trt_file is not None:
	from torch2trt import TRTModule

	model_trt = TRTModule()
	model_trt.load_state_dict(torch.load(trt_file))

	x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
	model(x)
	model = model_trt

	tracker = Sort(self.args.track_thresh)

	for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
	progress_bar(self.dataloader)
	):
	with torch.no_grad():
	# init tracker
	frame_id = info_imgs[2].item()
	video_id = info_imgs[3].item()
	img_file_name = info_imgs[4]
	video_name = img_file_name[0].split('/')[0]

	if video_name not in video_names:
	video_names[video_id] = video_name
	if frame_id == 1:
	tracker = Sort(self.args.track_thresh)
	if len(results) != 0:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
	write_results_no_score(result_filename, results)
	results = []

	imgs = imgs.type(tensor_type)

	# skip the the last iters since batchsize might be not enough for batch inference
	is_time_record = cur_iter < len(self.dataloader) - 1
	if is_time_record:
	start = time.time()

	outputs = model(imgs)
	if decoder is not None:
	outputs = decoder(outputs, dtype=outputs.type())

	outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

	if is_time_record:
	infer_end = time_synchronized()
	inference_time += infer_end - start

	output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
	data_list.extend(output_results)

	# run tracking
	online_targets = tracker.update(outputs[0], info_imgs, self.img_size)
	online_tlwhs = []
	online_ids = []
	for t in online_targets:
	tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
	tid = t[4]
	vertical = tlwh[2] / tlwh[3] > 1.6
	if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
	online_tlwhs.append(tlwh)
	online_ids.append(tid)
	# save results
	results.append((frame_id, online_tlwhs, online_ids))

	if is_time_record:
	track_end = time_synchronized()
	track_time += track_end - infer_end

	if cur_iter == len(self.dataloader) - 1:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
	write_results_no_score(result_filename, results)

	statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
	if distributed:
	data_list = gather(data_list, dst=0)
	data_list = list(itertools.chain(*data_list))
	torch.distributed.reduce(statistics, dst=0)

	eval_results = self.evaluate_prediction(data_list, statistics)
	synchronize()
	return eval_results

	def evaluate_deepsort(
	self,
	model,
	distributed=False,
	half=False,
	trt_file=None,
	decoder=None,
	test_size=None,
	result_folder=None,
	model_folder=None
	):
	"""
	COCO average precision (AP) Evaluation. Iterate inference on the test dataset
	and the results are evaluated by COCO API.

	NOTE: This function will change training mode to False, please save states if needed.

	Args:
	model : model to evaluate.

	Returns:
	ap50_95 (float) : COCO AP of IoU=50:95
	ap50 (float) : COCO AP of IoU=50
	summary (sr): summary info of evaluation.
	"""
	# TODO half to amp_test
	tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
	model = model.eval()
	if half:
	model = model.half()
	ids = []
	data_list = []
	results = []
	video_names = defaultdict()
	progress_bar = tqdm if is_main_process() else iter

	inference_time = 0
	track_time = 0
	n_samples = len(self.dataloader) - 1

	if trt_file is not None:
	from torch2trt import TRTModule

	model_trt = TRTModule()
	model_trt.load_state_dict(torch.load(trt_file))

	x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
	model(x)
	model = model_trt

	tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)

	for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
	progress_bar(self.dataloader)
	):
	with torch.no_grad():
	# init tracker
	frame_id = info_imgs[2].item()
	video_id = info_imgs[3].item()
	img_file_name = info_imgs[4]
	video_name = img_file_name[0].split('/')[0]

	if video_name not in video_names:
	video_names[video_id] = video_name
	if frame_id == 1:
	tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
	if len(results) != 0:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
	write_results_no_score(result_filename, results)
	results = []

	imgs = imgs.type(tensor_type)

	# skip the the last iters since batchsize might be not enough for batch inference
	is_time_record = cur_iter < len(self.dataloader) - 1
	if is_time_record:
	start = time.time()

	outputs = model(imgs)
	if decoder is not None:
	outputs = decoder(outputs, dtype=outputs.type())

	outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

	if is_time_record:
	infer_end = time_synchronized()
	inference_time += infer_end - start

	output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
	data_list.extend(output_results)

	# run tracking
	online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0])
	online_tlwhs = []
	online_ids = []
	for t in online_targets:
	tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
	tid = t[4]
	vertical = tlwh[2] / tlwh[3] > 1.6
	if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
	online_tlwhs.append(tlwh)
	online_ids.append(tid)
	# save results
	results.append((frame_id, online_tlwhs, online_ids))

	if is_time_record:
	track_end = time_synchronized()
	track_time += track_end - infer_end

	if cur_iter == len(self.dataloader) - 1:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
	write_results_no_score(result_filename, results)

	statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
	if distributed:
	data_list = gather(data_list, dst=0)
	data_list = list(itertools.chain(*data_list))
	torch.distributed.reduce(statistics, dst=0)

	eval_results = self.evaluate_prediction(data_list, statistics)
	synchronize()
	return eval_results

	def evaluate_motdt(
	self,
	model,
	distributed=False,
	half=False,
	trt_file=None,
	decoder=None,
	test_size=None,
	result_folder=None,
	model_folder=None
	):
	"""
	COCO average precision (AP) Evaluation. Iterate inference on the test dataset
	and the results are evaluated by COCO API.

	NOTE: This function will change training mode to False, please save states if needed.

	Args:
	model : model to evaluate.

	Returns:
	ap50_95 (float) : COCO AP of IoU=50:95
	ap50 (float) : COCO AP of IoU=50
	summary (sr): summary info of evaluation.
	"""
	# TODO half to amp_test
	tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
	model = model.eval()
	if half:
	model = model.half()
	ids = []
	data_list = []
	results = []
	video_names = defaultdict()
	progress_bar = tqdm if is_main_process() else iter

	inference_time = 0
	track_time = 0
	n_samples = len(self.dataloader) - 1

	if trt_file is not None:
	from torch2trt import TRTModule

	model_trt = TRTModule()
	model_trt.load_state_dict(torch.load(trt_file))

	x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
	model(x)
	model = model_trt

	tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
	for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
	progress_bar(self.dataloader)
	):
	with torch.no_grad():
	# init tracker
	frame_id = info_imgs[2].item()
	video_id = info_imgs[3].item()
	img_file_name = info_imgs[4]
	video_name = img_file_name[0].split('/')[0]

	if video_name not in video_names:
	video_names[video_id] = video_name
	if frame_id == 1:
	tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
	if len(results) != 0:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
	write_results(result_filename, results)
	results = []

	imgs = imgs.type(tensor_type)

	# skip the the last iters since batchsize might be not enough for batch inference
	is_time_record = cur_iter < len(self.dataloader) - 1
	if is_time_record:
	start = time.time()

	outputs = model(imgs)
	if decoder is not None:
	outputs = decoder(outputs, dtype=outputs.type())

	outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

	if is_time_record:
	infer_end = time_synchronized()
	inference_time += infer_end - start

	output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
	data_list.extend(output_results)

	# run tracking
	online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0])
	online_tlwhs = []
	online_ids = []
	online_scores = []
	for t in online_targets:
	tlwh = t.tlwh
	tid = t.track_id
	vertical = tlwh[2] / tlwh[3] > 1.6
	if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
	online_tlwhs.append(tlwh)
	online_ids.append(tid)
	online_scores.append(t.score)
	# save results
	results.append((frame_id, online_tlwhs, online_ids, online_scores))

	if is_time_record:
	track_end = time_synchronized()
	track_time += track_end - infer_end

	if cur_iter == len(self.dataloader) - 1:
	result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
	write_results(result_filename, results)

	statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
	if distributed:
	data_list = gather(data_list, dst=0)
	data_list = list(itertools.chain(*data_list))
	torch.distributed.reduce(statistics, dst=0)

	eval_results = self.evaluate_prediction(data_list, statistics)
	synchronize()
	return eval_results

	def convert_to_coco_format(self, outputs, info_imgs, ids):
	data_list = []
	for (output, img_h, img_w, img_id) in zip(
	outputs, info_imgs[0], info_imgs[1], ids
	):
	if output is None:
	continue
	output = output.cpu()

	bboxes = output[:, 0:4]

	# preprocessing: resize
	scale = min(
	self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
	)
	bboxes /= scale
	bboxes = xyxy2xywh(bboxes)

	cls = output[:, 6]
	scores = output[:, 4] * output[:, 5]
	for ind in range(bboxes.shape[0]):
	label = self.dataloader.dataset.class_ids[int(cls[ind])]
	pred_data = {
	"image_id": int(img_id),
	"category_id": label,
	"bbox": bboxes[ind].numpy().tolist(),
	"score": scores[ind].numpy().item(),
	"segmentation": [],
	} # COCO json format
	data_list.append(pred_data)
	return data_list

	def evaluate_prediction(self, data_dict, statistics):
	if not is_main_process():
	return 0, 0, None

	logger.info("Evaluate in main process...")

	annType = ["segm", "bbox", "keypoints"]

	inference_time = statistics[0].item()
	track_time = statistics[1].item()
	n_samples = statistics[2].item()

	a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
	a_track_time = 1000 * track_time / (n_samples * self.dataloader.batch_size)

	time_info = ", ".join(
	[
	"Average {} time: {:.2f} ms".format(k, v)
	for k, v in zip(
	["forward", "track", "inference"],
	[a_infer_time, a_track_time, (a_infer_time + a_track_time)],
	)
	]
	)

	info = time_info + "\n"

	# Evaluate the Dt (detection) json comparing with the ground truth
	if len(data_dict) > 0:
	cocoGt = self.dataloader.dataset.coco
	# TODO: since pycocotools can't process dict in py36, write data to json file.
	_, tmp = tempfile.mkstemp()
	json.dump(data_dict, open(tmp, "w"))
	cocoDt = cocoGt.loadRes(tmp)
	'''
	try:
	from yolox.layers import COCOeval_opt as COCOeval
	except ImportError:
	from pycocotools import cocoeval as COCOeval
	logger.warning("Use standard COCOeval.")
	'''
	#from pycocotools.cocoeval import COCOeval
	from yolox.layers import COCOeval_opt as COCOeval
	cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
	cocoEval.evaluate()
	cocoEval.accumulate()
	redirect_string = io.StringIO()
	with contextlib.redirect_stdout(redirect_string):
	cocoEval.summarize()
	info += redirect_string.getvalue()
	return cocoEval.stats[0], cocoEval.stats[1], info
	else:
	return 0, 0, info