ashawkey commited on
Commit
f6e1b58
·
1 Parent(s): 9e50a29

misc update

Browse files
Files changed (10) hide show
  1. loss.py +0 -11
  2. main_nerf.py +14 -5
  3. nerf/gui.py +3 -3
  4. nerf/network.py +5 -14
  5. nerf/network_grid.py +2 -7
  6. nerf/network_tcnn.py +0 -5
  7. nerf/provider.py +36 -26
  8. nerf/renderer.py +6 -5
  9. nerf/utils.py +5 -5
  10. readme.md +40 -4
loss.py DELETED
@@ -1,11 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
-
5
- def mape_loss(pred, target):
6
- # pred, target: [B, 1], torch tenspr
7
- difference = (pred - target).abs()
8
- scale = 1 / (target.abs() + 1e-2)
9
- loss = difference * scale
10
-
11
- return loss.mean()
 
 
 
 
 
 
 
 
 
 
 
 
main_nerf.py CHANGED
@@ -14,6 +14,7 @@ if __name__ == '__main__':
14
  parser = argparse.ArgumentParser()
15
  parser.add_argument('--text', default=None, help="text prompt")
16
  parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
 
17
  parser.add_argument('--test', action='store_true', help="test mode")
18
  parser.add_argument('--workspace', type=str, default='workspace')
19
  parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
@@ -37,8 +38,8 @@ if __name__ == '__main__':
37
  parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
38
  parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
39
  # rendering resolution in training
40
- parser.add_argument('--w', type=int, default=64, help="render width for NeRF in training")
41
- parser.add_argument('--h', type=int, default=64, help="render height for NeRF in training")
42
 
43
  ### dataset options
44
  parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
@@ -47,6 +48,11 @@ if __name__ == '__main__':
47
  parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
48
  parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
49
  parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
 
 
 
 
 
50
 
51
  ### GUI options
52
  parser.add_argument('--gui', action='store_true', help="start a GUI")
@@ -54,8 +60,8 @@ if __name__ == '__main__':
54
  parser.add_argument('--H', type=int, default=800, help="GUI height")
55
  parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
56
  parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
57
- parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction")
58
- parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction")
59
  parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
60
 
61
  opt = parser.parse_args()
@@ -64,7 +70,10 @@ if __name__ == '__main__':
64
  opt.fp16 = True
65
  opt.cuda_ray = True
66
  opt.dir_text = True
67
-
 
 
 
68
  if opt.backbone == 'vanilla':
69
  from nerf.network import NeRFNetwork
70
  elif opt.backbone == 'tcnn':
 
14
  parser = argparse.ArgumentParser()
15
  parser.add_argument('--text', default=None, help="text prompt")
16
  parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
17
+ parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
18
  parser.add_argument('--test', action='store_true', help="test mode")
19
  parser.add_argument('--workspace', type=str, default='workspace')
20
  parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
 
38
  parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
39
  parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
40
  # rendering resolution in training
41
+ parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
42
+ parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
43
 
44
  ### dataset options
45
  parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
 
48
  parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
49
  parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
50
  parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
51
+ parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
52
+ parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
53
+
54
+ parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
55
+ parser.add_argument('--lambda_orient', type=float, default=1e-2, help="loss scale for orientation")
56
 
57
  ### GUI options
58
  parser.add_argument('--gui', action='store_true', help="start a GUI")
 
60
  parser.add_argument('--H', type=int, default=800, help="GUI height")
61
  parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
62
  parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
63
+ parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction in [0, 180], corresponding to elevation [90, -90]")
64
+ parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction in [0, 360), azimuth")
65
  parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
66
 
67
  opt = parser.parse_args()
 
70
  opt.fp16 = True
71
  opt.cuda_ray = True
72
  opt.dir_text = True
73
+ elif opt.O2:
74
+ opt.fp16 = True
75
+ opt.dir_text = True
76
+
77
  if opt.backbone == 'vanilla':
78
  from nerf.network import NeRFNetwork
79
  elif opt.backbone == 'tcnn':
nerf/gui.py CHANGED
@@ -34,14 +34,14 @@ class OrbitCamera:
34
  # intrinsics
35
  @property
36
  def intrinsics(self):
37
- focal = self.H / (2 * np.tan(np.radians(self.fovy) / 2))
38
  return np.array([focal, focal, self.W // 2, self.H // 2])
39
 
40
  def orbit(self, dx, dy):
41
  # rotate along camera up/side axis!
42
  side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized.
43
- rotvec_x = self.up * np.radians(-0.1 * dx)
44
- rotvec_y = side * np.radians(-0.1 * dy)
45
  self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot
46
 
47
  def scale(self, delta):
 
34
  # intrinsics
35
  @property
36
  def intrinsics(self):
37
+ focal = self.H / (2 * np.tan(np.deg2rad(self.fovy) / 2))
38
  return np.array([focal, focal, self.W // 2, self.H // 2])
39
 
40
  def orbit(self, dx, dy):
41
  # rotate along camera up/side axis!
42
  side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized.
43
+ rotvec_x = self.up * np.deg2rad(-0.1 * dx)
44
+ rotvec_y = side * np.deg2rad(-0.1 * dy)
45
  self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot
46
 
47
  def scale(self, delta):
nerf/network.py CHANGED
@@ -37,26 +37,22 @@ class NeRFNetwork(NeRFRenderer):
37
  opt,
38
  num_layers=5,
39
  hidden_dim=128,
40
- num_layers_bg=3,
41
- hidden_dim_bg=128,
42
  ):
43
 
44
  super().__init__(opt)
45
 
46
  self.num_layers = num_layers
47
  self.hidden_dim = hidden_dim
48
-
49
  self.encoder, self.in_dim = get_encoder('frequency', input_dim=3)
50
-
51
  self.sigma_net = MLP(self.in_dim, 4, hidden_dim, num_layers, bias=True)
52
 
53
  # background network
54
  if self.bg_radius > 0:
55
  self.num_layers_bg = num_layers_bg
56
  self.hidden_dim_bg = hidden_dim_bg
57
-
58
- self.encoder_bg, self.in_dim_bg = get_encoder('tiledgrid', input_dim=2)
59
-
60
  self.bg_net = MLP(self.in_dim_bg, 3, hidden_dim_bg, num_layers_bg, bias=True)
61
 
62
  else:
@@ -84,7 +80,7 @@ class NeRFNetwork(NeRFRenderer):
84
  return sigma, albedo
85
 
86
  # ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
87
- def finite_differnce_normal(self, x, epsilon=5e-4):
88
  # x: [N, 3]
89
  dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
90
  dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
@@ -116,7 +112,7 @@ class NeRFNetwork(NeRFRenderer):
116
  # query normal
117
 
118
  # sigma, albedo = self.common_forward(x)
119
- # normal = self.finite_differnce_normal(x)
120
 
121
  with torch.enable_grad():
122
  x.requires_grad_(True)
@@ -128,11 +124,6 @@ class NeRFNetwork(NeRFRenderer):
128
  normal = safe_normalize(normal)
129
  normal[torch.isnan(normal)] = 0
130
 
131
- # light direction (random if not provided)
132
- if l is None:
133
- l = torch.randn(3, device=x.device, dtype=torch.float)
134
- l = safe_normalize(l)
135
-
136
  # lambertian shading
137
  lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
138
 
 
37
  opt,
38
  num_layers=5,
39
  hidden_dim=128,
40
+ num_layers_bg=2,
41
+ hidden_dim_bg=64,
42
  ):
43
 
44
  super().__init__(opt)
45
 
46
  self.num_layers = num_layers
47
  self.hidden_dim = hidden_dim
 
48
  self.encoder, self.in_dim = get_encoder('frequency', input_dim=3)
 
49
  self.sigma_net = MLP(self.in_dim, 4, hidden_dim, num_layers, bias=True)
50
 
51
  # background network
52
  if self.bg_radius > 0:
53
  self.num_layers_bg = num_layers_bg
54
  self.hidden_dim_bg = hidden_dim_bg
55
+ self.encoder_bg, self.in_dim_bg = get_encoder('frequency', input_dim=2)
 
 
56
  self.bg_net = MLP(self.in_dim_bg, 3, hidden_dim_bg, num_layers_bg, bias=True)
57
 
58
  else:
 
80
  return sigma, albedo
81
 
82
  # ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
83
+ def finite_difference_normal(self, x, epsilon=5e-4):
84
  # x: [N, 3]
85
  dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
86
  dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
 
112
  # query normal
113
 
114
  # sigma, albedo = self.common_forward(x)
115
+ # normal = self.finite_difference_normal(x)
116
 
117
  with torch.enable_grad():
118
  x.requires_grad_(True)
 
124
  normal = safe_normalize(normal)
125
  normal[torch.isnan(normal)] = 0
126
 
 
 
 
 
 
127
  # lambertian shading
128
  lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
129
 
nerf/network_grid.py CHANGED
@@ -87,7 +87,7 @@ class NeRFNetwork(NeRFRenderer):
87
  return sigma, albedo
88
 
89
  # ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
90
- def finite_differnce_normal(self, x, epsilon=5e-4):
91
  # x: [N, 3]
92
  dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
93
  dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
@@ -119,7 +119,7 @@ class NeRFNetwork(NeRFRenderer):
119
  # query normal
120
 
121
  sigma, albedo = self.common_forward(x)
122
- normal = self.finite_differnce_normal(x)
123
 
124
  # with torch.enable_grad():
125
  # x.requires_grad_(True)
@@ -131,11 +131,6 @@ class NeRFNetwork(NeRFRenderer):
131
  normal = safe_normalize(normal)
132
  normal[torch.isnan(normal)] = 0
133
 
134
- # light direction (random if not provided)
135
- if l is None:
136
- l = torch.randn(3, device=x.device, dtype=torch.float)
137
- l = safe_normalize(l)
138
-
139
  # lambertian shading
140
  lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
141
 
 
87
  return sigma, albedo
88
 
89
  # ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
90
+ def finite_difference_normal(self, x, epsilon=5e-4):
91
  # x: [N, 3]
92
  dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
93
  dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
 
119
  # query normal
120
 
121
  sigma, albedo = self.common_forward(x)
122
+ normal = self.finite_difference_normal(x)
123
 
124
  # with torch.enable_grad():
125
  # x.requires_grad_(True)
 
131
  normal = safe_normalize(normal)
132
  normal[torch.isnan(normal)] = 0
133
 
 
 
 
 
 
134
  # lambertian shading
135
  lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
136
 
nerf/network_tcnn.py CHANGED
@@ -133,11 +133,6 @@ class NeRFNetwork(NeRFRenderer):
133
  if not has_grad:
134
  normal = normal.detach()
135
 
136
- # light direction (random if not provided)
137
- if l is None:
138
- l = torch.randn(3, device=x.device, dtype=torch.float)
139
- l = l / (torch.norm(l, dim=-1, keepdim=True) + 1e-9)
140
-
141
  # lambertian shading
142
  lambertian = ratio + (1 - ratio) * (normal @ l).clamp(min=0) # [N,]
143
 
 
133
  if not has_grad:
134
  normal = normal.detach()
135
 
 
 
 
 
 
136
  # lambertian shading
137
  lambertian = ratio + (1 - ratio) * (normal @ l).clamp(min=0) # [N,]
138
 
nerf/provider.py CHANGED
@@ -35,37 +35,42 @@ def visualize_poses(poses, size=0.1):
35
 
36
  trimesh.Scene(objects).show()
37
 
38
- def get_view_direction(thetas, phis):
39
- # phis [B,]; thetas: [B,]
40
- # front = 0 0-90
41
- # side (left) = 1 90-180
42
- # back = 2 180-270
43
- # side (right) = 3 270-360
44
- # top = 4 0-30
45
- # bottom = 5 150-180
46
- res = torch.zeros(phis.shape[0], dtype=torch.long)
47
  # first determine by phis
48
- res[(phis < (np.pi / 2))] = 0
49
- res[(phis >= (np.pi / 2)) & (phis < np.pi)] = 1
50
- res[(phis >= np.pi) & (phis < (3 * np.pi / 2))] = 2
51
- res[(phis >= (3 * np.pi / 2)) & (phis < (2 * np.pi))] = 3
52
  # override by thetas
53
- res[thetas < (np.pi / 6)] = 4
54
- res[thetas >= (5 * np.pi / 6)] = 5
55
  return res
56
 
57
 
58
- def rand_poses(size, device, return_dirs=False, radius_range=[1, 1.5], theta_range=[0, 4 * np.pi / 6], phi_range=[0, 2*np.pi]):
59
  ''' generate random poses from an orbit camera
60
  Args:
61
  size: batch size of generated poses.
62
  device: where to allocate the output.
63
  radius: camera radius
64
- theta_range: [min, max], should be in [0, \pi]
65
- phi_range: [min, max], should be in [0, 2\pi]
66
  Return:
67
  poses: [size, 4, 4]
68
  '''
 
 
 
 
 
69
 
70
  radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0]
71
  thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0]
@@ -94,14 +99,19 @@ def rand_poses(size, device, return_dirs=False, radius_range=[1, 1.5], theta_ran
94
  poses[:, :3, 3] = centers
95
 
96
  if return_dirs:
97
- dirs = get_view_direction(thetas, phis)
98
  else:
99
  dirs = None
100
 
101
  return poses, dirs
102
 
103
 
104
- def circle_poses(device, return_dirs=False, radius=1.25, theta=np.pi/2, phi=0):
 
 
 
 
 
105
 
106
  thetas = torch.FloatTensor([theta]).to(device)
107
  phis = torch.FloatTensor([phi]).to(device)
@@ -123,7 +133,7 @@ def circle_poses(device, return_dirs=False, radius=1.25, theta=np.pi/2, phi=0):
123
  poses[:, :3, 3] = centers
124
 
125
  if return_dirs:
126
- dirs = get_view_direction(thetas, phis)
127
  else:
128
  dirs = None
129
 
@@ -160,20 +170,20 @@ class NeRFDataset:
160
 
161
  if self.training:
162
  # random pose on the fly
163
- poses, dirs = rand_poses(B, self.device, return_dirs=self.opt.dir_text, radius_range=self.radius_range)
164
 
165
  # random focal
166
  fov = random.random() * (self.fovy_range[1] - self.fovy_range[0]) + self.fovy_range[0]
167
- focal = self.H / (2 * np.tan(np.radians(fov) / 2))
168
  intrinsics = np.array([focal, focal, self.cx, self.cy])
169
  else:
170
  # circle pose
171
- phi = (index[0] / self.size) * 2 * np.pi
172
- poses, dirs = circle_poses(self.device, return_dirs=self.opt.dir_text, radius=self.radius_range[1] * 1.2, theta=np.pi/3, phi=phi)
173
 
174
  # fixed focal
175
  fov = (self.fovy_range[1] + self.fovy_range[0]) / 2
176
- focal = self.H / (2 * np.tan(np.radians(fov) / 2))
177
  intrinsics = np.array([focal, focal, self.cx, self.cy])
178
 
179
 
 
35
 
36
  trimesh.Scene(objects).show()
37
 
38
+ def get_view_direction(thetas, phis, overhead, front):
39
+ # phis [B,]; thetas: [B,]
40
+ # front = 0 [0, front)
41
+ # side (left) = 1 [front, 180)
42
+ # back = 2 [180, 180+front)
43
+ # side (right) = 3 [180+front, 360)
44
+ # top = 4 [0, overhead]
45
+ # bottom = 5 [180-overhead, 180]
46
+ res = torch.zeros(thetas.shape[0], dtype=torch.long)
47
  # first determine by phis
48
+ res[(phis < front)] = 0
49
+ res[(phis >= front) & (phis < np.pi)] = 1
50
+ res[(phis >= np.pi) & (phis < (np.pi + front))] = 2
51
+ res[(phis >= (np.pi + front))] = 3
52
  # override by thetas
53
+ res[thetas <= overhead] = 4
54
+ res[thetas >= (np.pi - overhead)] = 5
55
  return res
56
 
57
 
58
+ def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0, 150], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60):
59
  ''' generate random poses from an orbit camera
60
  Args:
61
  size: batch size of generated poses.
62
  device: where to allocate the output.
63
  radius: camera radius
64
+ theta_range: [min, max], should be in [0, pi]
65
+ phi_range: [min, max], should be in [0, 2 * pi]
66
  Return:
67
  poses: [size, 4, 4]
68
  '''
69
+
70
+ theta_range = np.deg2rad(theta_range)
71
+ phi_range = np.deg2rad(phi_range)
72
+ angle_overhead = np.deg2rad(angle_overhead)
73
+ angle_front = np.deg2rad(angle_front)
74
 
75
  radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0]
76
  thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0]
 
99
  poses[:, :3, 3] = centers
100
 
101
  if return_dirs:
102
+ dirs = get_view_direction(thetas, phis, angle_overhead, angle_front)
103
  else:
104
  dirs = None
105
 
106
  return poses, dirs
107
 
108
 
109
+ def circle_poses(device, radius=1.25, theta=60, phi=0, return_dirs=False, angle_overhead=30, angle_front=60):
110
+
111
+ theta = np.deg2rad(theta)
112
+ phi = np.deg2rad(phi)
113
+ angle_overhead = np.deg2rad(angle_overhead)
114
+ angle_front = np.deg2rad(angle_front)
115
 
116
  thetas = torch.FloatTensor([theta]).to(device)
117
  phis = torch.FloatTensor([phi]).to(device)
 
133
  poses[:, :3, 3] = centers
134
 
135
  if return_dirs:
136
+ dirs = get_view_direction(thetas, phis, angle_overhead, angle_front)
137
  else:
138
  dirs = None
139
 
 
170
 
171
  if self.training:
172
  # random pose on the fly
173
+ poses, dirs = rand_poses(B, self.device, radius_range=self.radius_range, return_dirs=self.opt.dir_text, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
174
 
175
  # random focal
176
  fov = random.random() * (self.fovy_range[1] - self.fovy_range[0]) + self.fovy_range[0]
177
+ focal = self.H / (2 * np.tan(np.deg2rad(fov) / 2))
178
  intrinsics = np.array([focal, focal, self.cx, self.cy])
179
  else:
180
  # circle pose
181
+ phi = (index[0] / self.size) * 360
182
+ poses, dirs = circle_poses(self.device, radius=self.radius_range[1] * 1.2, theta=60, phi=phi, return_dirs=self.opt.dir_text, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
183
 
184
  # fixed focal
185
  fov = (self.fovy_range[1] + self.fovy_range[0]) / 2
186
+ focal = self.H / (2 * np.tan(np.deg2rad(fov) / 2))
187
  intrinsics = np.array([focal, focal, self.cx, self.cy])
188
 
189
 
nerf/renderer.py CHANGED
@@ -448,6 +448,12 @@ class NeRFRenderer(nn.Module):
448
  # pre-calculate near far
449
  nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer)
450
 
 
 
 
 
 
 
451
  results = {}
452
 
453
  if self.training:
@@ -476,11 +482,6 @@ class NeRFRenderer(nn.Module):
476
 
477
  # allocate outputs
478
  dtype = torch.float32
479
-
480
- # fix light for all samples if not provided
481
- if light_d is None:
482
- light_d = torch.randn(3, device=device, dtype=torch.float)
483
- light_d = safe_normalize(light_d)
484
 
485
  weights_sum = torch.zeros(N, dtype=dtype, device=device)
486
  depth = torch.zeros(N, dtype=dtype, device=device)
 
448
  # pre-calculate near far
449
  nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer)
450
 
451
+ # random sample light_d if not provided
452
+ if light_d is None:
453
+ # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face)
454
+ light_d = - (rays_o[0] + torch.randn(3, device=device, dtype=torch.float))
455
+ light_d = safe_normalize(light_d)
456
+
457
  results = {}
458
 
459
  if self.training:
 
482
 
483
  # allocate outputs
484
  dtype = torch.float32
 
 
 
 
 
485
 
486
  weights_sum = torch.zeros(N, dtype=dtype, device=device)
487
  depth = torch.zeros(N, dtype=dtype, device=device)
nerf/utils.py CHANGED
@@ -365,11 +365,11 @@ class Trainer(object):
365
  # alphas = alphas ** 2 # skewed entropy, favors 0 over 1
366
  loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
367
 
368
- loss = loss_guidance + 1e-3 * loss_entropy
369
 
370
  if 'loss_orient' in outputs:
371
  loss_orient = outputs['loss_orient']
372
- loss = loss + 1e-2 * loss_orient
373
 
374
  return pred_rgb, pred_ws, loss
375
 
@@ -398,7 +398,7 @@ class Trainer(object):
398
  # alphas = alphas ** 2 # skewed entropy, favors 0 over 1
399
  loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
400
 
401
- loss = 1e-3 * loss_entropy
402
 
403
  return pred_rgb, pred_depth, loss
404
 
@@ -638,7 +638,7 @@ class Trainer(object):
638
  return outputs
639
 
640
  def train_one_epoch(self, loader):
641
- self.log(f"==> Start Training Epoch {self.epoch}, lr={self.optimizer.param_groups[0]['lr']:.6f} ...")
642
 
643
  total_loss = 0
644
  if self.local_rank == 0 and self.report_metric_at_train:
@@ -722,7 +722,7 @@ class Trainer(object):
722
 
723
 
724
  def evaluate_one_epoch(self, loader, name=None):
725
- self.log(f"++> Evaluate at epoch {self.epoch} ...")
726
 
727
  if name is None:
728
  name = f'{self.name}_ep{self.epoch:04d}'
 
365
  # alphas = alphas ** 2 # skewed entropy, favors 0 over 1
366
  loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
367
 
368
+ loss = loss_guidance + self.opt.lambda_entropy * loss_entropy
369
 
370
  if 'loss_orient' in outputs:
371
  loss_orient = outputs['loss_orient']
372
+ loss = loss + self.opt.lambda_orient * loss_orient
373
 
374
  return pred_rgb, pred_ws, loss
375
 
 
398
  # alphas = alphas ** 2 # skewed entropy, favors 0 over 1
399
  loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
400
 
401
+ loss = self.opt.lambda_entropy * loss_entropy
402
 
403
  return pred_rgb, pred_depth, loss
404
 
 
638
  return outputs
639
 
640
  def train_one_epoch(self, loader):
641
+ self.log(f"==> Start Training {self.workspace} Epoch {self.epoch}, lr={self.optimizer.param_groups[0]['lr']:.6f} ...")
642
 
643
  total_loss = 0
644
  if self.local_rank == 0 and self.report_metric_at_train:
 
722
 
723
 
724
  def evaluate_one_epoch(self, loader, name=None):
725
+ self.log(f"++> Evaluate {self.workspace} at epoch {self.epoch} ...")
726
 
727
  if name is None:
728
  name = f'{self.name}_ep{self.epoch:04d}'
readme.md CHANGED
@@ -4,14 +4,14 @@ A pytorch implementation of the text-to-3D model **Dreamfusion**, powered by the
4
 
5
  The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
6
 
7
- Examples generated from text prompt `a DSLR photo of a pineapple` viewed with the GUI in real time:
8
 
9
  https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
10
 
11
  ### [Gallery](https://github.com/ashawkey/stable-dreamfusion/issues/1) | [Update Logs](assets/update_logs.md)
12
 
13
  # Important Notice
14
- This project is a **work-in-progress**, and contains lots of differences from the paper. Also, many features are still not implemented now. **The current generation quality cannot match the results from the original paper, and still fail badly for many prompts.**
15
 
16
 
17
  ## Notable differences from the paper
@@ -83,7 +83,7 @@ python main_nerf.py --text "a hamburger" --workspace trial_clip -O --guidance cl
83
  python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
84
  ```
85
 
86
- # Code organization
87
 
88
  This is a simple description of the most important implementation details.
89
  If you are interested in improving this repo, this might be a starting point.
@@ -101,14 +101,50 @@ w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
101
  grad = w * (noise_pred - noise)
102
  latents.backward(gradient=grad, retain_graph=True)
103
  ```
104
- * Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
 
105
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
 
 
 
 
 
 
 
 
106
 
107
  # Acknowledgement
108
 
109
  * The amazing original work: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
 
 
 
 
 
 
 
 
110
 
111
  * Huge thanks to the [Stable Diffusion](https://github.com/CompVis/stable-diffusion) and the [diffusers](https://github.com/huggingface/diffusers) library.
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  * The GUI is developed with [DearPyGui](https://github.com/hoffstadt/DearPyGui).
 
4
 
5
  The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
6
 
7
+ Examples generated from text prompt `a high quality photo of a pineapple` viewed with the GUI in real time:
8
 
9
  https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
10
 
11
  ### [Gallery](https://github.com/ashawkey/stable-dreamfusion/issues/1) | [Update Logs](assets/update_logs.md)
12
 
13
  # Important Notice
14
+ This project is a **work-in-progress**, and contains lots of differences from the paper. Also, many features are still not implemented now. **The current generation quality cannot match the results from the original paper, and many prompts still fail badly!**
15
 
16
 
17
  ## Notable differences from the paper
 
83
  python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
84
  ```
85
 
86
+ # Code organization & Advanced tips
87
 
88
  This is a simple description of the most important implementation details.
89
  If you are interested in improving this repo, this might be a starting point.
 
101
  grad = w * (noise_pred - noise)
102
  latents.backward(gradient=grad, retain_graph=True)
103
  ```
104
+ * Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
105
+ * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
106
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
107
+ * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
108
+ * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian ,and textureless
109
+ * light direction: current implementation use a plane light source, instead of a point light source...
110
+ * View-dependent prompting: `./nerf/provider.pu > get_view_direction`.
111
+ * ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
112
+ * Network backbone (`./nerf/network*.py`) can be chosen by the `--backbone` option, but `tcnn` and `vanilla` are not well tested.
113
+ * the occupancy grid based training acceleration (instant-ngp like) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later.
114
+ * Spatial density bias (gaussian density blob): `./nerf/network*.py > NeRFNetwork > gaussian`.
115
 
116
  # Acknowledgement
117
 
118
  * The amazing original work: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
119
+ ```
120
+ @article{poole2022dreamfusion,
121
+ author = {Poole, Ben and Jain, Ajay and Barron, Jonathan T. and Mildenhall, Ben},
122
+ title = {DreamFusion: Text-to-3D using 2D Diffusion},
123
+ journal = {arXiv},
124
+ year = {2022},
125
+ }
126
+ ```
127
 
128
  * Huge thanks to the [Stable Diffusion](https://github.com/CompVis/stable-diffusion) and the [diffusers](https://github.com/huggingface/diffusers) library.
129
 
130
+ ```
131
+ @misc{rombach2021highresolution,
132
+ title={High-Resolution Image Synthesis with Latent Diffusion Models},
133
+ author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
134
+ year={2021},
135
+ eprint={2112.10752},
136
+ archivePrefix={arXiv},
137
+ primaryClass={cs.CV}
138
+ }
139
+
140
+ @misc{von-platen-etal-2022-diffusers,
141
+ author = {Patrick von Platen and Suraj Patil and Anton Lozhkov and Pedro Cuenca and Nathan Lambert and Kashif Rasul and Mishig Davaadorj and Thomas Wolf},
142
+ title = {Diffusers: State-of-the-art diffusion models},
143
+ year = {2022},
144
+ publisher = {GitHub},
145
+ journal = {GitHub repository},
146
+ howpublished = {\url{https://github.com/huggingface/diffusers}}
147
+ }
148
+ ```
149
 
150
  * The GUI is developed with [DearPyGui](https://github.com/hoffstadt/DearPyGui).