Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import datetime as datetime | |
| import json | |
| import numpy as np | |
| import torch | |
| import torch.nn.functional as F | |
| from torch.utils.data import DataLoader | |
| from torchvision import transforms | |
| from dataloaders.eval_datasets import YOUTUBEVOS_Test, YOUTUBEVOS_DenseTest, DAVIS_Test, EVAL_TEST | |
| import dataloaders.video_transforms as tr | |
| from utils.image import flip_tensor, save_mask | |
| from utils.checkpoint import load_network | |
| from utils.eval import zip_folder | |
| from networks.models import build_vos_model | |
| from networks.engines import build_engine | |
| class Evaluator(object): | |
| def __init__(self, cfg, rank=0, seq_queue=None, info_queue=None): | |
| self.gpu = cfg.TEST_GPU_ID + rank | |
| self.gpu_num = cfg.TEST_GPU_NUM | |
| self.rank = rank | |
| self.cfg = cfg | |
| self.seq_queue = seq_queue | |
| self.info_queue = info_queue | |
| self.print_log("Exp {}:".format(cfg.EXP_NAME)) | |
| self.print_log(json.dumps(cfg.__dict__, indent=4, sort_keys=True)) | |
| print("Use GPU {} for evaluating.".format(self.gpu)) | |
| torch.cuda.set_device(self.gpu) | |
| self.print_log('Build VOS model.') | |
| self.model = build_vos_model(cfg.MODEL_VOS, cfg).cuda(self.gpu) | |
| self.process_pretrained_model() | |
| self.prepare_dataset() | |
| def process_pretrained_model(self): | |
| cfg = self.cfg | |
| if cfg.TEST_CKPT_PATH == 'test': | |
| self.ckpt = 'test' | |
| self.print_log('Test evaluation.') | |
| return | |
| if cfg.TEST_CKPT_PATH is None: | |
| if cfg.TEST_CKPT_STEP is not None: | |
| ckpt = str(cfg.TEST_CKPT_STEP) | |
| else: | |
| ckpts = os.listdir(cfg.DIR_CKPT) | |
| if len(ckpts) > 0: | |
| ckpts = list( | |
| map(lambda x: int(x.split('_')[-1].split('.')[0]), | |
| ckpts)) | |
| ckpt = np.sort(ckpts)[-1] | |
| else: | |
| self.print_log('No checkpoint in {}.'.format(cfg.DIR_CKPT)) | |
| exit() | |
| self.ckpt = ckpt | |
| if cfg.TEST_EMA: | |
| cfg.DIR_CKPT = os.path.join(cfg.DIR_RESULT, 'ema_ckpt') | |
| cfg.TEST_CKPT_PATH = os.path.join(cfg.DIR_CKPT, | |
| 'save_step_%s.pth' % ckpt) | |
| try: | |
| self.model, removed_dict = load_network( | |
| self.model, cfg.TEST_CKPT_PATH, self.gpu) | |
| except Exception as inst: | |
| self.print_log(inst) | |
| self.print_log('Try to use backup checkpoint.') | |
| DIR_RESULT = './backup/{}/{}'.format(cfg.EXP_NAME, | |
| cfg.STAGE_NAME) | |
| DIR_CKPT = os.path.join(DIR_RESULT, 'ema_ckpt') | |
| TEST_CKPT_PATH = os.path.join(DIR_CKPT, | |
| 'save_step_%s.pth' % ckpt) | |
| self.model, removed_dict = load_network( | |
| self.model, TEST_CKPT_PATH, self.gpu) | |
| if len(removed_dict) > 0: | |
| self.print_log( | |
| 'Remove {} from pretrained model.'.format(removed_dict)) | |
| self.print_log('Load latest checkpoint from {}'.format( | |
| cfg.TEST_CKPT_PATH)) | |
| else: | |
| self.ckpt = 'unknown' | |
| self.model, removed_dict = load_network(self.model, | |
| cfg.TEST_CKPT_PATH, | |
| self.gpu) | |
| if len(removed_dict) > 0: | |
| self.print_log( | |
| 'Remove {} from pretrained model.'.format(removed_dict)) | |
| self.print_log('Load checkpoint from {}'.format( | |
| cfg.TEST_CKPT_PATH)) | |
| def prepare_dataset(self): | |
| cfg = self.cfg | |
| self.print_log('Process dataset...') | |
| eval_transforms = transforms.Compose([ | |
| tr.MultiRestrictSize(cfg.TEST_MAX_SHORT_EDGE, | |
| cfg.TEST_MAX_LONG_EDGE, cfg.TEST_FLIP, | |
| cfg.TEST_MULTISCALE, cfg.MODEL_ALIGN_CORNERS), | |
| tr.MultiToTensor() | |
| ]) | |
| exp_name = cfg.EXP_NAME | |
| if 'aost' in cfg.MODEL_VOS: | |
| exp_name += '_L{}'.format(int(cfg.MODEL_LSTT_NUM)) | |
| eval_name = '{}_{}_{}_{}_ckpt_{}'.format(cfg.TEST_DATASET, | |
| cfg.TEST_DATASET_SPLIT, | |
| exp_name, cfg.STAGE_NAME, | |
| self.ckpt) | |
| if cfg.TEST_EMA: | |
| eval_name += '_ema' | |
| if cfg.TEST_FLIP: | |
| eval_name += '_flip' | |
| if len(cfg.TEST_MULTISCALE) > 1: | |
| eval_name += '_ms_' + str(cfg.TEST_MULTISCALE).replace( | |
| '.', 'dot').replace('[', '').replace(']', '').replace( | |
| ', ', '_') | |
| if 'youtubevos' in cfg.TEST_DATASET: | |
| year = int(cfg.TEST_DATASET[-4:]) | |
| self.result_root = os.path.join(cfg.DIR_EVALUATION, | |
| cfg.TEST_DATASET, eval_name, | |
| 'Annotations') | |
| if '_all_frames' in cfg.TEST_DATASET_SPLIT: | |
| split = cfg.TEST_DATASET_SPLIT.split('_')[0] | |
| youtubevos_test = YOUTUBEVOS_DenseTest | |
| self.result_root_sparse = os.path.join(cfg.DIR_EVALUATION, | |
| cfg.TEST_DATASET, | |
| eval_name + '_sparse', | |
| 'Annotations') | |
| self.zip_dir_sparse = os.path.join( | |
| cfg.DIR_EVALUATION, cfg.TEST_DATASET, | |
| '{}_sparse.zip'.format(eval_name)) | |
| else: | |
| split = cfg.TEST_DATASET_SPLIT | |
| youtubevos_test = YOUTUBEVOS_Test | |
| self.dataset = youtubevos_test(root=cfg.DIR_YTB, | |
| year=year, | |
| split=split, | |
| transform=eval_transforms, | |
| result_root=self.result_root) | |
| elif cfg.TEST_DATASET == 'davis2017': | |
| resolution = 'Full-Resolution' if cfg.TEST_DATASET_FULL_RESOLUTION else '480p' | |
| self.result_root = os.path.join(cfg.DIR_EVALUATION, | |
| cfg.TEST_DATASET, eval_name, | |
| 'Annotations', resolution) | |
| self.dataset = DAVIS_Test( | |
| split=[cfg.TEST_DATASET_SPLIT], | |
| root=cfg.DIR_DAVIS, | |
| year=2017, | |
| transform=eval_transforms, | |
| full_resolution=cfg.TEST_DATASET_FULL_RESOLUTION, | |
| result_root=self.result_root) | |
| elif cfg.TEST_DATASET == 'davis2016': | |
| resolution = 'Full-Resolution' if cfg.TEST_DATASET_FULL_RESOLUTION else '480p' | |
| self.result_root = os.path.join(cfg.DIR_EVALUATION, | |
| cfg.TEST_DATASET, eval_name, | |
| 'Annotations', resolution) | |
| self.dataset = DAVIS_Test( | |
| split=[cfg.TEST_DATASET_SPLIT], | |
| root=cfg.DIR_DAVIS, | |
| year=2016, | |
| transform=eval_transforms, | |
| full_resolution=cfg.TEST_DATASET_FULL_RESOLUTION, | |
| result_root=self.result_root) | |
| elif cfg.TEST_DATASET == 'test': | |
| self.result_root = os.path.join(cfg.DIR_EVALUATION, | |
| cfg.TEST_DATASET, eval_name, | |
| 'Annotations') | |
| self.dataset = EVAL_TEST(eval_transforms, self.result_root) | |
| else: | |
| self.print_log('Unknown dataset!') | |
| exit() | |
| self.print_log('Eval {} on {} {}:'.format(cfg.EXP_NAME, | |
| cfg.TEST_DATASET, | |
| cfg.TEST_DATASET_SPLIT)) | |
| self.source_folder = os.path.join(cfg.DIR_EVALUATION, cfg.TEST_DATASET, | |
| eval_name, 'Annotations') | |
| self.zip_dir = os.path.join(cfg.DIR_EVALUATION, cfg.TEST_DATASET, | |
| '{}.zip'.format(eval_name)) | |
| if not os.path.exists(self.result_root): | |
| try: | |
| os.makedirs(self.result_root) | |
| except Exception as inst: | |
| self.print_log(inst) | |
| self.print_log('Failed to mask dir: {}.'.format( | |
| self.result_root)) | |
| self.print_log('Done!') | |
| def evaluating(self): | |
| cfg = self.cfg | |
| self.model.eval() | |
| video_num = 0 | |
| processed_video_num = 0 | |
| total_time = 0 | |
| total_frame = 0 | |
| total_sfps = 0 | |
| total_video_num = len(self.dataset) | |
| start_eval_time = time.time() | |
| if self.seq_queue is not None: | |
| if self.rank == 0: | |
| for seq_idx in range(total_video_num): | |
| self.seq_queue.put(seq_idx) | |
| for _ in range(self.gpu_num): | |
| self.seq_queue.put('END') | |
| coming_seq_idx = self.seq_queue.get() | |
| all_engines = [] | |
| with torch.no_grad(): | |
| for seq_idx, seq_dataset in enumerate(self.dataset): | |
| video_num += 1 | |
| if self.seq_queue is not None: | |
| if coming_seq_idx == 'END': | |
| break | |
| elif coming_seq_idx != seq_idx: | |
| continue | |
| else: | |
| coming_seq_idx = self.seq_queue.get() | |
| processed_video_num += 1 | |
| for engine in all_engines: | |
| engine.restart_engine() | |
| seq_name = seq_dataset.seq_name | |
| print('GPU {} - Processing Seq {} [{}/{}]:'.format( | |
| self.gpu, seq_name, video_num, total_video_num)) | |
| torch.cuda.empty_cache() | |
| seq_dataloader = DataLoader(seq_dataset, | |
| batch_size=1, | |
| shuffle=False, | |
| num_workers=cfg.TEST_WORKERS, | |
| pin_memory=True) | |
| if 'all_frames' in cfg.TEST_DATASET_SPLIT: | |
| images_sparse = seq_dataset.images_sparse | |
| seq_dir_sparse = os.path.join(self.result_root_sparse, | |
| seq_name) | |
| if not os.path.exists(seq_dir_sparse): | |
| os.makedirs(seq_dir_sparse) | |
| seq_total_time = 0 | |
| seq_total_frame = 0 | |
| seq_pred_masks = {'dense': [], 'sparse': []} | |
| seq_timers = [] | |
| for frame_idx, samples in enumerate(seq_dataloader): | |
| all_preds = [] | |
| new_obj_label = None | |
| aug_num = len(samples) | |
| for aug_idx in range(aug_num): | |
| if len(all_engines) <= aug_idx: | |
| all_engines.append( | |
| build_engine(cfg.MODEL_ENGINE, | |
| phase='eval', | |
| aot_model=self.model, | |
| gpu_id=self.gpu, | |
| long_term_mem_gap=self.cfg. | |
| TEST_LONG_TERM_MEM_GAP, | |
| short_term_mem_skip=self.cfg. | |
| TEST_SHORT_TERM_MEM_SKIP)) | |
| all_engines[-1].eval() | |
| if aug_num > 1: # if use test-time augmentation | |
| torch.cuda.empty_cache() # release GPU memory | |
| engine = all_engines[aug_idx] | |
| sample = samples[aug_idx] | |
| is_flipped = sample['meta']['flip'] | |
| obj_nums = sample['meta']['obj_num'] | |
| imgname = sample['meta']['current_name'] | |
| ori_height = sample['meta']['height'] | |
| ori_width = sample['meta']['width'] | |
| obj_idx = sample['meta']['obj_idx'] | |
| obj_nums = [int(obj_num) for obj_num in obj_nums] | |
| obj_idx = [int(_obj_idx) for _obj_idx in obj_idx] | |
| current_img = sample['current_img'] | |
| current_img = current_img.cuda(self.gpu, | |
| non_blocking=True) | |
| sample['current_img'] = current_img | |
| if 'current_label' in sample.keys(): | |
| current_label = sample['current_label'].cuda( | |
| self.gpu, non_blocking=True).float() | |
| else: | |
| current_label = None | |
| ############################################################# | |
| if frame_idx == 0: | |
| _current_label = F.interpolate( | |
| current_label, | |
| size=current_img.size()[2:], | |
| mode="nearest") | |
| engine.add_reference_frame(current_img, | |
| _current_label, | |
| frame_step=0, | |
| obj_nums=obj_nums) | |
| else: | |
| if aug_idx == 0: | |
| seq_timers.append([]) | |
| now_timer = torch.cuda.Event( | |
| enable_timing=True) | |
| now_timer.record() | |
| seq_timers[-1].append(now_timer) | |
| engine.match_propogate_one_frame(current_img) | |
| pred_logit = engine.decode_current_logits( | |
| (ori_height, ori_width)) | |
| if is_flipped: | |
| pred_logit = flip_tensor(pred_logit, 3) | |
| pred_prob = torch.softmax(pred_logit, dim=1) | |
| all_preds.append(pred_prob) | |
| if not is_flipped and current_label is not None and new_obj_label is None: | |
| new_obj_label = current_label | |
| if frame_idx > 0: | |
| all_pred_probs = [ | |
| torch.mean(pred, dim=0, keepdim=True) | |
| for pred in all_preds | |
| ] | |
| all_pred_labels = [ | |
| torch.argmax(prob, dim=1, keepdim=True).float() | |
| for prob in all_pred_probs | |
| ] | |
| cat_all_preds = torch.cat(all_preds, dim=0) | |
| pred_prob = torch.mean(cat_all_preds, | |
| dim=0, | |
| keepdim=True) | |
| pred_label = torch.argmax(pred_prob, | |
| dim=1, | |
| keepdim=True).float() | |
| if new_obj_label is not None: | |
| keep = (new_obj_label == 0).float() | |
| all_pred_labels = [label * \ | |
| keep + new_obj_label * (1 - keep) for label in all_pred_labels] | |
| pred_label = pred_label * \ | |
| keep + new_obj_label * (1 - keep) | |
| new_obj_nums = [int(pred_label.max().item())] | |
| if cfg.TEST_FLIP: | |
| all_flip_pred_labels = [ | |
| flip_tensor(label, 3) | |
| for label in all_pred_labels | |
| ] | |
| flip_pred_label = flip_tensor(pred_label, 3) | |
| for aug_idx in range(len(samples)): | |
| engine = all_engines[aug_idx] | |
| current_img = samples[aug_idx]['current_img'] | |
| # current_label = flip_pred_label if samples[ | |
| # aug_idx]['meta']['flip'] else pred_label | |
| current_label = all_flip_pred_labels[ | |
| aug_idx] if samples[aug_idx]['meta'][ | |
| 'flip'] else all_pred_labels[aug_idx] | |
| current_label = F.interpolate( | |
| current_label, | |
| size=engine.input_size_2d, | |
| mode="nearest") | |
| engine.add_reference_frame( | |
| current_img, | |
| current_label, | |
| obj_nums=new_obj_nums, | |
| frame_step=frame_idx) | |
| engine.decode_current_logits( | |
| (ori_height, ori_width)) | |
| engine.update_memory(current_label) | |
| else: | |
| if not cfg.MODEL_USE_PREV_PROB: | |
| if cfg.TEST_FLIP: | |
| all_flip_pred_labels = [ | |
| flip_tensor(label, 3) | |
| for label in all_pred_labels | |
| ] | |
| flip_pred_label = flip_tensor( | |
| pred_label, 3) | |
| for aug_idx in range(len(samples)): | |
| engine = all_engines[aug_idx] | |
| # current_label = flip_pred_label if samples[ | |
| # aug_idx]['meta']['flip'] else pred_label | |
| current_label = all_flip_pred_labels[ | |
| aug_idx] if samples[aug_idx]['meta'][ | |
| 'flip'] else all_pred_labels[ | |
| aug_idx] | |
| current_label = F.interpolate( | |
| current_label, | |
| size=engine.input_size_2d, | |
| mode="nearest") | |
| engine.update_memory(current_label) | |
| else: | |
| if cfg.TEST_FLIP: | |
| all_flip_pred_probs = [ | |
| flip_tensor(prob, 3) | |
| for prob in all_pred_probs | |
| ] | |
| flip_pred_prob = flip_tensor(pred_prob, 3) | |
| for aug_idx in range(len(samples)): | |
| engine = all_engines[aug_idx] | |
| # current_prob = flip_pred_prob if samples[ | |
| # aug_idx]['meta']['flip'] else pred_prob | |
| current_label = all_flip_pred_probs[ | |
| aug_idx] if samples[aug_idx]['meta'][ | |
| 'flip'] else all_pred_probs[aug_idx] | |
| current_prob = F.interpolate( | |
| current_prob, | |
| size=engine.input_size_2d, | |
| mode="nearest") | |
| engine.update_memory(current_prob) | |
| now_timer = torch.cuda.Event(enable_timing=True) | |
| now_timer.record() | |
| seq_timers[-1].append((now_timer)) | |
| if cfg.TEST_FRAME_LOG: | |
| torch.cuda.synchronize() | |
| one_frametime = seq_timers[-1][0].elapsed_time( | |
| seq_timers[-1][1]) / 1e3 | |
| obj_num = obj_nums[0] | |
| print( | |
| 'GPU {} - Frame: {} - Obj Num: {}, Time: {}ms'. | |
| format(self.gpu, imgname[0].split('.')[0], | |
| obj_num, int(one_frametime * 1e3))) | |
| # Save result | |
| seq_pred_masks['dense'].append({ | |
| 'path': | |
| os.path.join(self.result_root, seq_name, | |
| imgname[0].split('.')[0] + '.png'), | |
| 'mask': | |
| pred_label, | |
| 'obj_idx': | |
| obj_idx | |
| }) | |
| if 'all_frames' in cfg.TEST_DATASET_SPLIT and imgname in images_sparse: | |
| seq_pred_masks['sparse'].append({ | |
| 'path': | |
| os.path.join(self.result_root_sparse, seq_name, | |
| imgname[0].split('.')[0] + | |
| '.png'), | |
| 'mask': | |
| pred_label, | |
| 'obj_idx': | |
| obj_idx | |
| }) | |
| # Save result | |
| for mask_result in seq_pred_masks['dense'] + seq_pred_masks[ | |
| 'sparse']: | |
| save_mask(mask_result['mask'].squeeze(0).squeeze(0), | |
| mask_result['path'], mask_result['obj_idx']) | |
| del (seq_pred_masks) | |
| for timer in seq_timers: | |
| torch.cuda.synchronize() | |
| one_frametime = timer[0].elapsed_time(timer[1]) / 1e3 | |
| seq_total_time += one_frametime | |
| seq_total_frame += 1 | |
| del (seq_timers) | |
| seq_avg_time_per_frame = seq_total_time / seq_total_frame | |
| total_time += seq_total_time | |
| total_frame += seq_total_frame | |
| total_avg_time_per_frame = total_time / total_frame | |
| total_sfps += seq_avg_time_per_frame | |
| avg_sfps = total_sfps / processed_video_num | |
| max_mem = torch.cuda.max_memory_allocated( | |
| device=self.gpu) / (1024.**3) | |
| print( | |
| "GPU {} - Seq {} - FPS: {:.2f}. All-Frame FPS: {:.2f}, All-Seq FPS: {:.2f}, Max Mem: {:.2f}G" | |
| .format(self.gpu, seq_name, 1. / seq_avg_time_per_frame, | |
| 1. / total_avg_time_per_frame, 1. / avg_sfps, | |
| max_mem)) | |
| if self.seq_queue is not None: | |
| if self.rank != 0: | |
| self.info_queue.put({ | |
| 'total_time': total_time, | |
| 'total_frame': total_frame, | |
| 'total_sfps': total_sfps, | |
| 'processed_video_num': processed_video_num, | |
| 'max_mem': max_mem | |
| }) | |
| print('Finished the evaluation on GPU {}.'.format(self.gpu)) | |
| if self.rank == 0: | |
| for _ in range(self.gpu_num - 1): | |
| info_dict = self.info_queue.get() | |
| total_time += info_dict['total_time'] | |
| total_frame += info_dict['total_frame'] | |
| total_sfps += info_dict['total_sfps'] | |
| processed_video_num += info_dict['processed_video_num'] | |
| max_mem = max(max_mem, info_dict['max_mem']) | |
| all_reduced_total_avg_time_per_frame = total_time / total_frame | |
| all_reduced_avg_sfps = total_sfps / processed_video_num | |
| print( | |
| "GPU {} - All-Frame FPS: {:.2f}, All-Seq FPS: {:.2f}, Max Mem: {:.2f}G" | |
| .format(list(range(self.gpu_num)), | |
| 1. / all_reduced_total_avg_time_per_frame, | |
| 1. / all_reduced_avg_sfps, max_mem)) | |
| else: | |
| print( | |
| "GPU {} - All-Frame FPS: {:.2f}, All-Seq FPS: {:.2f}, Max Mem: {:.2f}G" | |
| .format(self.gpu, 1. / total_avg_time_per_frame, 1. / avg_sfps, | |
| max_mem)) | |
| if self.rank == 0: | |
| zip_folder(self.source_folder, self.zip_dir) | |
| self.print_log('Saving result to {}.'.format(self.zip_dir)) | |
| if 'all_frames' in cfg.TEST_DATASET_SPLIT: | |
| zip_folder(self.result_root_sparse, self.zip_dir_sparse) | |
| end_eval_time = time.time() | |
| total_eval_time = str( | |
| datetime.timedelta(seconds=int(end_eval_time - | |
| start_eval_time))) | |
| self.print_log("Total evaluation time: {}".format(total_eval_time)) | |
| def print_log(self, string): | |
| if self.rank == 0: | |
| print(string) | |