Spaces:
Running
on
Zero
Running
on
Zero
| """Script to convert a dataset to hdf5 format.""" | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import numpy as np | |
| from tqdm import tqdm | |
| from vis4d.common.imports import H5PY_AVAILABLE | |
| if H5PY_AVAILABLE: | |
| import h5py | |
| else: | |
| raise ImportError("Please install h5py to enable HDF5Backend.") | |
| def convert_dataset(source_dir: str) -> None: | |
| """Convert a dataset to HDF5 format. | |
| This function converts an arbitary dictionary to an HDF5 file. The keys | |
| inside the HDF5 file preserve the directory structure of the original. | |
| As an example, if you convert "/path/to/dataset" to HDF5, the resulting | |
| file will be: "/path/to/dataset.hdf5". The file "relative/path/to/file" | |
| will be stored at "relative/path/to/file" inside /path/to/dataset.hdf5. | |
| Args: | |
| source_dir (str): The path to the dataset to convert. | |
| """ | |
| if not os.path.exists(source_dir): | |
| raise FileNotFoundError(f"No such file or directory: {source_dir}") | |
| source_dir = os.path.join(source_dir, "") # must end with trailing slash | |
| hdf5_path = source_dir.rstrip("/") + ".hdf5" | |
| if os.path.exists(hdf5_path): | |
| print(f"File {hdf5_path} already exists! Skipping {source_dir}") | |
| return | |
| print(f"Converting dataset at: {source_dir}") | |
| hdf5_file = h5py.File(hdf5_path, mode="w") | |
| sub_dirs = list(os.walk(source_dir)) | |
| file_count = sum(len(files) for (_, _, files) in sub_dirs) | |
| with tqdm(total=file_count) as pbar: | |
| for root, _, files in sub_dirs: | |
| g_name = root.replace(source_dir, "") | |
| g = hdf5_file.create_group(g_name) if g_name else hdf5_file | |
| for f in files: | |
| filepath = os.path.join(root, f) | |
| if os.path.isfile(filepath): | |
| with open(filepath, "rb") as fp: | |
| file_content = fp.read() | |
| g.create_dataset( | |
| f, data=np.frombuffer(file_content, dtype="uint8") | |
| ) | |
| pbar.update() | |
| hdf5_file.close() | |
| print("done.") | |
| if __name__ == "__main__": # pragma: no cover | |
| parser = argparse.ArgumentParser( | |
| description="Converts a dataset at the specified path to hdf5. The " | |
| "local directory structure is preserved in the hdf5 file." | |
| ) | |
| parser.add_argument( | |
| "-p", | |
| "--path", | |
| required=True, | |
| help="path to the root folder of a specific dataset to convert", | |
| ) | |
| args = parser.parse_args() | |
| convert_dataset(args.path) | |