|
import json |
|
from collections import defaultdict |
|
import jsonlines |
|
|
|
subsets = ['train', 'val', 'test'] |
|
savepath = "flickr30k/annotations" |
|
|
|
set2jsonline = { |
|
'train': 'flickr30k/all_data_final_train_2014.jsonline', |
|
'val': 'flickr30k/all_data_final_val_set0_2014.jsonline', |
|
'test': 'flickr30k/all_data_final_test_set0_2014.jsonline', |
|
} |
|
|
|
import os |
|
if not os.path.exists(savepath): |
|
os.makedirs(savepath) |
|
|
|
|
|
savename = { |
|
'train': "flickr30k/captions_train.json", |
|
'val': "flickr30k/captions_val.json", |
|
'test': "flickr30k/captions_test.json", |
|
} |
|
|
|
|
|
|
|
|
|
for subset in subsets: |
|
imagefield = [] |
|
annotaionfiled = [] |
|
sen_id = 0 |
|
with jsonlines.open(set2jsonline[subset]) as reader: |
|
for annotation in reader: |
|
sentences = annotation["sentences"] |
|
image_id = annotation["img_path"] |
|
imagefield.append({ |
|
"filename": annotation["img_path"], |
|
"id": annotation['id'], |
|
}) |
|
for sentence in sentences: |
|
annotaionfiled.append({ |
|
"image_id": annotation['id'], |
|
"id": sen_id, |
|
"caption": sentence, |
|
}) |
|
sen_id += 1 |
|
|
|
data = { |
|
"images": imagefield, |
|
"annotations": annotaionfiled, |
|
} |
|
json.dump( data, open(savename[subset], "w")) |
|
|