3D-MOOD / vis4d /data /io /to_hdf5.py
RoyYang0714's picture
feat: Try to build everything locally.
9b33fca
"""Script to convert a dataset to hdf5 format."""
from __future__ import annotations
import argparse
import os
import numpy as np
from tqdm import tqdm
from vis4d.common.imports import H5PY_AVAILABLE
if H5PY_AVAILABLE:
import h5py
else:
raise ImportError("Please install h5py to enable HDF5Backend.")
def convert_dataset(source_dir: str) -> None:
"""Convert a dataset to HDF5 format.
This function converts an arbitary dictionary to an HDF5 file. The keys
inside the HDF5 file preserve the directory structure of the original.
As an example, if you convert "/path/to/dataset" to HDF5, the resulting
file will be: "/path/to/dataset.hdf5". The file "relative/path/to/file"
will be stored at "relative/path/to/file" inside /path/to/dataset.hdf5.
Args:
source_dir (str): The path to the dataset to convert.
"""
if not os.path.exists(source_dir):
raise FileNotFoundError(f"No such file or directory: {source_dir}")
source_dir = os.path.join(source_dir, "") # must end with trailing slash
hdf5_path = source_dir.rstrip("/") + ".hdf5"
if os.path.exists(hdf5_path):
print(f"File {hdf5_path} already exists! Skipping {source_dir}")
return
print(f"Converting dataset at: {source_dir}")
hdf5_file = h5py.File(hdf5_path, mode="w")
sub_dirs = list(os.walk(source_dir))
file_count = sum(len(files) for (_, _, files) in sub_dirs)
with tqdm(total=file_count) as pbar:
for root, _, files in sub_dirs:
g_name = root.replace(source_dir, "")
g = hdf5_file.create_group(g_name) if g_name else hdf5_file
for f in files:
filepath = os.path.join(root, f)
if os.path.isfile(filepath):
with open(filepath, "rb") as fp:
file_content = fp.read()
g.create_dataset(
f, data=np.frombuffer(file_content, dtype="uint8")
)
pbar.update()
hdf5_file.close()
print("done.")
if __name__ == "__main__": # pragma: no cover
parser = argparse.ArgumentParser(
description="Converts a dataset at the specified path to hdf5. The "
"local directory structure is preserved in the hdf5 file."
)
parser.add_argument(
"-p",
"--path",
required=True,
help="path to the root folder of a specific dataset to convert",
)
args = parser.parse_args()
convert_dataset(args.path)