|
import os |
|
import json |
|
import torch |
|
import torchvision.transforms as transforms |
|
import os.path |
|
import numpy as np |
|
import cv2 |
|
from PIL import Image |
|
from torch.utils.data import Dataset |
|
import random |
|
from .__base_dataset__ import BaseDataset |
|
import h5py |
|
|
|
def creat_uv_mesh(H, W): |
|
y, x = np.meshgrid(np.arange(0, H, dtype=np.float), np.arange(0, W, dtype=np.float), indexing='ij') |
|
meshgrid = np.stack((x,y)) |
|
ones = np.ones((1,H*W), dtype=np.float) |
|
xy = meshgrid.reshape(2, -1) |
|
return np.concatenate([xy, ones], axis=0) |
|
|
|
class HypersimDataset(BaseDataset): |
|
def __init__(self, cfg, phase, **kwargs): |
|
super(HypersimDataset, self).__init__( |
|
cfg=cfg, |
|
phase=phase, |
|
**kwargs) |
|
self.metric_scale = cfg.metric_scale |
|
|
|
|
|
|
|
|
|
self.xy = creat_uv_mesh(768, 1024) |
|
|
|
def load_batch(self, meta_data, data_path): |
|
curr_intrinsic = meta_data['cam_in'] |
|
|
|
curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) |
|
|
|
curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) |
|
|
|
curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) |
|
|
|
curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) |
|
|
|
depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) |
|
curr_depth[~depth_mask] = -1 |
|
data_batch = dict( |
|
curr_rgb = curr_rgb, |
|
curr_depth = curr_depth, |
|
curr_sem = curr_sem, |
|
curr_normal = curr_normal, |
|
curr_cam_model=curr_cam_model, |
|
) |
|
return data_batch |
|
|
|
def load_data_path(self, meta_data): |
|
|
|
|
|
|
|
|
|
meta_data['rgb'] = meta_data['rgbs']['rgb_color'] |
|
curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) |
|
curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) |
|
curr_sem_path = os.path.join(self.sem_root, meta_data['sem']) \ |
|
if self.sem_root is not None and ('sem' in meta_data) and (meta_data['sem'] is not None) \ |
|
else None |
|
curr_norm_path = os.path.join(self.norm_root, meta_data['normal']) \ |
|
if ('normal' in meta_data) and (meta_data['normal'] is not None) and (self.norm_root is not None) \ |
|
else None |
|
curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) \ |
|
if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) \ |
|
else None |
|
|
|
data_path=dict( |
|
rgb_path=curr_rgb_path, |
|
depth_path=curr_depth_path, |
|
sem_path=curr_sem_path, |
|
normal_path=curr_norm_path, |
|
depth_mask_path=curr_depth_mask_path, |
|
) |
|
return data_path |
|
|
|
def load_rgb_depth(self, rgb_path: str, depth_path: str): |
|
""" |
|
Load the rgb and depth map with the paths. |
|
""" |
|
rgb = self.load_data(rgb_path, is_rgb_img=True) |
|
if rgb is None: |
|
self.logger.info(f'>>>>{rgb_path} has errors.') |
|
|
|
|
|
with h5py.File(depth_path, "r") as f: depth = f["dataset"][:] |
|
np.nan_to_num(depth, copy=False, nan=0) |
|
if depth is None: |
|
self.logger.info(f'{depth_path} has errors.') |
|
|
|
depth = depth.astype(np.float) |
|
|
|
depth = self.process_depth(depth, rgb) |
|
return rgb, depth |
|
|
|
|
|
def load_norm_label(self, norm_path, H, W, depth, K): |
|
with h5py.File(norm_path, "r") as f: |
|
normal = f["dataset"][:] |
|
np.nan_to_num(normal, copy=False, nan=0) |
|
normal[:,:,1:] *= -1 |
|
normal = normal.astype(np.float) |
|
|
|
return self.align_normal(normal, depth, K, H, W) |
|
|
|
def process_depth(self, depth: np.array, rgb: np.array) -> np.array: |
|
depth[depth>60000] = 0 |
|
depth = depth / self.metric_scale |
|
return depth |
|
|
|
def align_normal(self, normal, depth, K, H, W): |
|
''' |
|
Orientation of surface normals in hypersim is not always consistent |
|
see https://github.com/apple/ml-hypersim/issues/26 |
|
''' |
|
|
|
K = np.array([[K[0], 0 ,K[2]], |
|
[0, K[1], K[3]], |
|
[0, 0, 1]]) |
|
inv_K = np.linalg.inv(K) |
|
|
|
if H == 768 and W == 1024: |
|
xy = self.xy |
|
else: |
|
print('img size no-equal 768x1024') |
|
xy = creat_uv_mesh(H, W) |
|
points = np.matmul(inv_K[:3, :3], xy).reshape(3, H, W) |
|
points = depth * points |
|
points = points.transpose((1,2,0)) |
|
|
|
|
|
orient_mask = np.sum(normal * points, axis=2) > 0 |
|
normal[orient_mask] *= -1 |
|
|
|
return normal |