Metric3D / training /mono /datasets /ibims_dataset.py

zach

initial commit based on github repo

3ef1661 12 months ago

3.68 kB

	import os
	import json
	import torch
	import torchvision.transforms as transforms
	import os.path
	import numpy as np
	import cv2
	from torch.utils.data import Dataset
	import random
	from .__base_dataset__ import BaseDataset


	class IBIMSDataset(BaseDataset):
	def __init__(self, cfg, phase, **kwargs):
	super(IBIMSDataset, self).__init__(
	cfg=cfg,
	phase=phase,
	**kwargs)
	self.metric_scale = cfg.metric_scale

	self.avg = torch.nn.AvgPool2d(kernel_size=7, stride=1, ceil_mode=False, count_include_pad=True, divisor_override=None)
	self.unfold = torch.nn.Unfold(kernel_size=7, dilation=1, padding=0, stride=1)
	self.pad = torch.nn.ZeroPad2d(3)


	def process_depth(self, depth, rgb):
	depth[depth>50000] = 0
	depth /= self.metric_scale
	return depth

	def load_batch(self, meta_data, data_path):
	curr_intrinsic = meta_data['cam_in']
	# load rgb/depth
	curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path'])
	# get semantic labels
	curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth)
	# create camera model
	curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic)
	# get normal labels
	curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) # !!! this is diff of BaseDataset
	# get depth mask
	depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path'])
	curr_depth[~depth_mask] = -1
	data_batch = dict(
	curr_rgb = curr_rgb,
	curr_depth = curr_depth,
	curr_sem = curr_sem,
	curr_normal = curr_normal,
	curr_cam_model=curr_cam_model,
	)
	return data_batch

	def load_norm_label(self, norm_path, H, W, depth, K):
	depth = torch.from_numpy(depth).squeeze()
	K = torch.Tensor([[K[0], 0 ,K[2]],
	[0, K[1], K[3]],
	[0, 0, 1]])
	K_inv = K.inverse()

	y, x = torch.meshgrid([torch.arange(0, 480, dtype=torch.float32),
	torch.arange(0, 640, dtype=torch.float32)], indexing='ij')
	x = x.reshape(1, 480*640)
	y = y.reshape(1, 480*640)
	ones = torch.ones_like(x)
	coord_2d = torch.cat((x, y, ones), dim=0)

	coord_3d = torch.matmul(K_inv, coord_2d).view(3, 480, 640)
	coord_3d = (coord_3d * depth[None, :])[None, :]
	coord_3d_mean = self.avg(coord_3d)

	uf_coord_3d = self.unfold(coord_3d.permute(1, 0, 2, 3))
	coord_3d_decenter = uf_coord_3d - coord_3d_mean.view(3, 1, (480-6)*(640-6))
	coord_3d_decenter = coord_3d_decenter.permute(2, 0, 1)
	cov = torch.bmm(coord_3d_decenter, coord_3d_decenter.permute(0, 2, 1))

	eig = torch.linalg.eigh(cov)
	#svd = torch.linalg.svd(coord_3d_decenter)
	normal = (eig[1])[:, :, 0].float()
	#normal = (svd[1])[:, 2, :]
	normal = self.pad(normal.permute(1, 0).view(1, 3, (480-6), (640-6)))

	orient_mask = (torch.sum(normal * coord_3d, axis=1) < 0).unsqueeze(1)
	normal = normal * orient_mask - normal * (~orient_mask)
	gt_normal = normal.squeeze().permute(1, 2, 0).numpy()
	return gt_normal

	if __name__ == '__main__':
	from mmcv.utils import Config
	cfg = Config.fromfile('mono/configs/Apolloscape_DDAD/convnext_base.cascade.1m.sgd.mae.py')
	dataset_i = IBIMSDataset(cfg['Apolloscape'], 'train', **cfg.data_basic)
	print(dataset_i)