VRIS_vip / make_ref-ytvos /folder2lmdb.py
dianecy's picture
Add files using upload-large-folder tool
9b855a7 verified
raw
history blame
3.53 kB
import argparse
import os
import os.path as osp
import lmdb
from PIL import Image
import pyarrow as pa
import json
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
def loads_pyarrow(buf):
"""
Args:
buf: the output of `dumps`.
"""
return pa.deserialize(buf)
def raw_reader(path):
with open(path, 'rb') as f:
bin_data = f.read()
return bin_data
def dumps_pyarrow(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object
"""
return pa.serialize(obj).to_buffer()
def folder2lmdb(json_data, img_dir, mask_dir, output_dir, split, write_frequency=1000):
lmdb_path = osp.join(output_dir, "%s.lmdb" % split)
isdir = os.path.isdir(lmdb_path)
print("Generate LMDB to %s" % lmdb_path)
db = lmdb.open(lmdb_path, subdir=isdir,
map_size=1099511627776 * 2, readonly=False,
meminit=False, map_async=True)
txn = db.begin(write=True)
tbar = tqdm(json_data)
for idx, item in enumerate(tbar):
img = raw_reader(osp.join(img_dir, item['file_name']))
mask = raw_reader(osp.join(mask_dir, f"{idx}.png"))
# Pillow로 파일 크기 확인
#with Image.open(osp.join(img_dir, item['file_name'])) as im:
#print(f"Image size (Pillow): {im.size}")
#with Image.open(osp.join(mask_dir, item['file_name'])) as mk:
#print(f"Mask size (Pillow): {mk.size}")
data = {'img': img, 'mask': mask, 'cat': item['category_name'],
'seg_id': idx, 'file_name': item['file_name'],
'num_sents': 1, 'sents': item['sentences']['sent']}
txn.put(u'{}'.format(idx).encode('ascii'), dumps_pyarrow(data))
if idx % write_frequency == 0:
# print("[%d/%d]" % (idx, len(data_loader)))
txn.commit()
txn = db.begin(write=True)
# finish iterating through dataset
txn.commit()
keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)]
with db.begin(write=True) as txn:
txn.put(b'__keys__', dumps_pyarrow(keys))
txn.put(b'__len__', dumps_pyarrow(len(keys)))
print("Flushing database ...")
db.sync()
db.close()
def parse_args():
parser = argparse.ArgumentParser(description='COCO Folder to LMDB.')
parser.add_argument('-j', '--json-dir', type=str,
default='',
help='the name of json file.')
parser.add_argument('-i', '--img-dir', type=str,
default='refcoco+',
help='the folder of images.')
parser.add_argument('-m', '--mask-dir', type=str,
default='refcoco+',
help='the folder of masks.')
parser.add_argument('-o', '--output-dir', type=str,
default='refcoco+',
help='the folder of output lmdb file.')
parser.add_argument('-s', '--split', type=str,
default='train',
help='the split type.')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
args.split = osp.basename(args.json_dir).split(".")[0]
os.makedirs(args.output_dir, exist_ok=True)
json_data = []
with open(args.json_dir, 'rb') as f:
for line in f:
json_data.append(json.loads(line))
folder2lmdb(json_data, args.img_dir, args.mask_dir, args.output_dir, args.split)