|
import sys |
|
import os |
|
from os import path as osp |
|
sys.path.append(osp.abspath(osp.join(osp.dirname(__file__), '..'))) |
|
|
|
from datasets import build_dataset |
|
import argparse |
|
import opts |
|
|
|
|
|
from pathlib import Path |
|
import io |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import regex as re |
|
import json |
|
|
|
import cv2 |
|
from PIL import Image, ImageDraw |
|
import torch |
|
from torchvision.transforms import functional as F |
|
|
|
from skimage import measure |
|
from shapely.geometry import Polygon, MultiPolygon |
|
|
|
import matplotlib.pyplot as plt |
|
import matplotlib.patches as patches |
|
from matplotlib.collections import PatchCollection |
|
from matplotlib.patches import Rectangle |
|
|
|
|
|
import ipywidgets as widgets |
|
from IPython.display import display, clear_output |
|
|
|
|
|
def createJson(train_dataset, metas): |
|
entire_json = {} |
|
|
|
|
|
vid_idx = 0 |
|
|
|
while vid_idx < len(train_dataset): |
|
|
|
|
|
video_data = {} |
|
video_train_frames, video_train_info = train_dataset[vid_idx] |
|
video_meta = metas[vid_idx] |
|
|
|
video_id = video_meta['video'] |
|
video_data['bins'] = video_meta['bins'] |
|
bin_nums = len(video_meta['bins']) |
|
obj_nums = max([int(k) for k in list(video_meta['obj_id_cat'].keys())]) |
|
|
|
annotation_data = [] |
|
frame_names = [] |
|
|
|
for i in range(bin_nums): |
|
bin_data = {} |
|
for j in range(obj_nums): |
|
obj_id = str(j+1) |
|
try: |
|
obj_data = { |
|
"category_name":video_meta['obj_id_cat'][obj_id], |
|
"bbox":video_train_info['boxes'][i*obj_nums+j, :].tolist(), |
|
"valid":video_train_info['valid'][i*obj_nums+j].item() |
|
} |
|
except: |
|
obj_data = {} |
|
bin_data[obj_id] = obj_data |
|
annotation_data.append(bin_data) |
|
|
|
video_data['annotations'] = annotation_data |
|
|
|
|
|
sample_indx = metas[vid_idx]['sample_indx'] |
|
frames = metas[vid_idx]['frames'] |
|
for i in sample_indx: |
|
frame_name = frames[i] |
|
frame_names.append(frame_name) |
|
|
|
video_data['frame_names'] = frame_names |
|
video_data['video_path'] = os.path.join(str(train_dataset.img_folder), 'JPEGImages', video_id) |
|
entire_json[video_id] = video_data |
|
|
|
vid_idx += 1 |
|
|
|
return entire_json |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()]) |
|
args = parser.parse_args() |
|
|
|
|
|
|
|
train_dataset = build_dataset('ytvos_ref', image_set = 'train', args = args) |
|
|
|
|
|
metas = train_dataset.metas |
|
|
|
|
|
entire_json_dict = createJson(train_dataset, metas) |
|
print(type(entire_json_dict)) |
|
entire_json = json.dumps(entire_json_dict, indent=4) |
|
|
|
with open('mbench/sampled_frame3.json', mode='w') as file: |
|
file.write(entire_json) |
|
|