import os |
import json |
import torch |
import torchvision.transforms as transforms |
import os.path |
import numpy as np |
import cv2 |
from PIL import Image |
from torch.utils.data import Dataset |
import random |
from .__base_dataset__ import BaseDataset |
import h5py |
def creat_uv_mesh(H, W): |
y, x = np.meshgrid(np.arange(0, H, dtype=np.float), np.arange(0, W, dtype=np.float), indexing='ij') |
meshgrid = np.stack((x,y)) |
ones = np.ones((1,H*W), dtype=np.float) |
xy = meshgrid.reshape(2, -1) |
return np.concatenate([xy, ones], axis=0) |
class HypersimDataset(BaseDataset): |
def __init__(self, cfg, phase, **kwargs): |
super(HypersimDataset, self).__init__( |
cfg=cfg, |
phase=phase, |
**kwargs) |
self.metric_scale = cfg.metric_scale |
self.xy = creat_uv_mesh(768, 1024) |
def load_batch(self, meta_data, data_path): |
curr_intrinsic = meta_data['cam_in'] |
curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) |
curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) |
curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) |
curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) |
depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) |
curr_depth[~depth_mask] = -1 |
data_batch = dict( |
curr_rgb = curr_rgb, |
curr_depth = curr_depth, |
curr_sem = curr_sem, |
curr_normal = curr_normal, |
curr_cam_model=curr_cam_model, |
) |
return data_batch |
def load_data_path(self, meta_data): |
meta_data['rgb'] = meta_data['rgbs']['rgb_color'] |
curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) |
curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) |
curr_sem_path = os.path.join(self.sem_root, meta_data['sem']) \ |
if self.sem_root is not None and ('sem' in meta_data) and (meta_data['sem'] is not None) \ |
else None |
curr_norm_path = os.path.join(self.norm_root, meta_data['normal']) \ |
if ('normal' in meta_data) and (meta_data['normal'] is not None) and (self.norm_root is not None) \ |
else None |
curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) \ |
if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) \ |
else None |
data_path=dict( |
rgb_path=curr_rgb_path, |
depth_path=curr_depth_path, |
sem_path=curr_sem_path, |
normal_path=curr_norm_path, |
depth_mask_path=curr_depth_mask_path, |
) |
return data_path |
def load_rgb_depth(self, rgb_path: str, depth_path: str): |
""" |
Load the rgb and depth map with the paths. |
""" |
rgb = self.load_data(rgb_path, is_rgb_img=True) |
if rgb is None: |
self.logger.info(f'>>>>{rgb_path} has errors.') |
with h5py.File(depth_path, "r") as f: depth = f["dataset"][:] |
np.nan_to_num(depth, copy=False, nan=0) |
if depth is None: |
self.logger.info(f'{depth_path} has errors.') |
depth = depth.astype(np.float) |
depth = self.process_depth(depth, rgb) |
return rgb, depth |
def load_norm_label(self, norm_path, H, W, depth, K): |
with h5py.File(norm_path, "r") as f: |
normal = f["dataset"][:] |
np.nan_to_num(normal, copy=False, nan=0) |
normal[:,:,1:] *= -1 |
normal = normal.astype(np.float) |
return self.align_normal(normal, depth, K, H, W) |
def process_depth(self, depth: np.array, rgb: np.array) -> np.array: |
depth[depth>60000] = 0 |
depth = depth / self.metric_scale |
return depth |
def align_normal(self, normal, depth, K, H, W): |
''' |
Orientation of surface normals in hypersim is not always consistent |
see https://github.com/apple/ml-hypersim/issues/26 |
''' |
K = np.array([[K[0], 0 ,K[2]], |
[0, K[1], K[3]], |
[0, 0, 1]]) |
inv_K = np.linalg.inv(K) |
if H == 768 and W == 1024: |
xy = self.xy |
else: |
print('img size no-equal 768x1024') |
xy = creat_uv_mesh(H, W) |
points = np.matmul(inv_K[:3, :3], xy).reshape(3, H, W) |
points = depth * points |
points = points.transpose((1,2,0)) |
orient_mask = np.sum(normal * points, axis=2) > 0 |
normal[orient_mask] *= -1 |
return normal |