|
import json |
|
import os |
|
|
|
subsets = ["train", "val", "test"] |
|
save_path = 'msvd_dataset/new_annotations' |
|
|
|
subset = subsets[1] |
|
|
|
videoindex = open("msvd_dataset/txt_labels/youtube_mapping.txt", 'r').readlines() |
|
sentence_count = 1 |
|
for subset in subsets: |
|
name2idx = dict() |
|
idx2name = dict() |
|
|
|
for v in videoindex: |
|
name2idx[v.split()[0]] = v.split()[1] |
|
idx2name[v.split()[1]] = v.split()[0] |
|
|
|
images_field = [] |
|
annotations_field = [] |
|
visited_imames = set() |
|
txtfile = "msvd_dataset/txt_labels/sents_{}_lc_nopunc.txt".format(subset) |
|
capinfos = open(txtfile, 'r').readlines() |
|
for caption in capinfos: |
|
vidindex = caption.split('\t')[0] |
|
if vidindex not in visited_imames: |
|
visited_imames.add(vidindex) |
|
images_field.append( |
|
{ |
|
"id": int(vidindex.replace('vid', '')), |
|
"file_name": idx2name[vidindex] |
|
} |
|
) |
|
annotations_field.append( |
|
{ |
|
"image_id":int(caption.split()[0].replace('vid', '')), |
|
"id": sentence_count, |
|
"caption": caption.split('\t')[1].strip() |
|
|
|
} |
|
) |
|
sentence_count += 1 |
|
|
|
data = { |
|
"images": images_field, |
|
"annotations": annotations_field |
|
} |
|
json.dump(data, open(os.path.join(save_path, "caption_msvd_{}_cocostyle.json".format(subset)), "w")) |