|
import glob |
|
import os |
|
|
|
import lmdb |
|
import numpy as np |
|
import pickle |
|
import sys |
|
import tqdm |
|
import shutil |
|
|
|
pre_path = r'H:\DataSet\SceneCls\UCMerced_LandUse\UCMerced_LandUse\Images' |
|
file_list = glob.glob(pre_path+'/*/*') |
|
dataset_name = 'UCMerced' |
|
cache_keys = ['filename', 'gt_label'] |
|
|
|
|
|
lmdb_path = os.path.abspath(pre_path + f'/../{dataset_name}_lmdb') |
|
|
|
|
|
os.makedirs(lmdb_path, exist_ok=True) |
|
|
|
data_size_per_item = sys.getsizeof(open(file_list[0], 'rb').read()) |
|
print(f'data size:{data_size_per_item}') |
|
|
|
|
|
env = lmdb.open(lmdb_path+f'\\{os.path.basename(lmdb_path)}.lmdb', map_size=data_size_per_item * 1e5) |
|
txn = env.begin(write=True) |
|
|
|
commit_interval = 5 |
|
keys_list = [] |
|
for idx, file in enumerate(file_list): |
|
key = f'{dataset_name}_{os.path.basename(file).split(".")[0]}' |
|
keys_list.append(key) |
|
|
|
for cache_key in cache_keys: |
|
if cache_key == 'filename': |
|
value = os.path.basename(os.path.dirname(file)) + '/' + os.path.basename(file) |
|
elif cache_key == 'img': |
|
with open(file, 'rb') as f: |
|
|
|
value = f.read() |
|
elif cache_key == 'gt_label': |
|
value = os.path.basename(os.path.dirname(file)) |
|
cache_key = key + f'_{cache_key}' |
|
cache_key = cache_key.encode() |
|
|
|
if isinstance(value, bytes): |
|
txn.put(cache_key, value) |
|
else: |
|
|
|
txn.put(cache_key, value.encode()) |
|
if idx % commit_interval == 1: |
|
txn.commit() |
|
txn = env.begin(write=True) |
|
txn.commit() |
|
env.close() |
|
keys_list = np.array(keys_list) |
|
np.savetxt(open(pre_path+'/../keys_list.txt', 'w'), keys_list, fmt='%s') |
|
print(f'Finish writing!') |
|
|
|
|