misc update
Browse files- loss.py +0 -11
- main_nerf.py +14 -5
- nerf/gui.py +3 -3
- nerf/network.py +5 -14
- nerf/network_grid.py +2 -7
- nerf/network_tcnn.py +0 -5
- nerf/provider.py +36 -26
- nerf/renderer.py +6 -5
- nerf/utils.py +5 -5
- readme.md +40 -4
loss.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
import torch.nn.functional as F
|
4 |
-
|
5 |
-
def mape_loss(pred, target):
|
6 |
-
# pred, target: [B, 1], torch tenspr
|
7 |
-
difference = (pred - target).abs()
|
8 |
-
scale = 1 / (target.abs() + 1e-2)
|
9 |
-
loss = difference * scale
|
10 |
-
|
11 |
-
return loss.mean()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main_nerf.py
CHANGED
@@ -14,6 +14,7 @@ if __name__ == '__main__':
|
|
14 |
parser = argparse.ArgumentParser()
|
15 |
parser.add_argument('--text', default=None, help="text prompt")
|
16 |
parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
|
|
|
17 |
parser.add_argument('--test', action='store_true', help="test mode")
|
18 |
parser.add_argument('--workspace', type=str, default='workspace')
|
19 |
parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
|
@@ -37,8 +38,8 @@ if __name__ == '__main__':
|
|
37 |
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
|
38 |
parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
|
39 |
# rendering resolution in training
|
40 |
-
parser.add_argument('--w', type=int, default=
|
41 |
-
parser.add_argument('--h', type=int, default=
|
42 |
|
43 |
### dataset options
|
44 |
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
|
@@ -47,6 +48,11 @@ if __name__ == '__main__':
|
|
47 |
parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
|
48 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
49 |
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
### GUI options
|
52 |
parser.add_argument('--gui', action='store_true', help="start a GUI")
|
@@ -54,8 +60,8 @@ if __name__ == '__main__':
|
|
54 |
parser.add_argument('--H', type=int, default=800, help="GUI height")
|
55 |
parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
|
56 |
parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
|
57 |
-
parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction")
|
58 |
-
parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction")
|
59 |
parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
|
60 |
|
61 |
opt = parser.parse_args()
|
@@ -64,7 +70,10 @@ if __name__ == '__main__':
|
|
64 |
opt.fp16 = True
|
65 |
opt.cuda_ray = True
|
66 |
opt.dir_text = True
|
67 |
-
|
|
|
|
|
|
|
68 |
if opt.backbone == 'vanilla':
|
69 |
from nerf.network import NeRFNetwork
|
70 |
elif opt.backbone == 'tcnn':
|
|
|
14 |
parser = argparse.ArgumentParser()
|
15 |
parser.add_argument('--text', default=None, help="text prompt")
|
16 |
parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
|
17 |
+
parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
|
18 |
parser.add_argument('--test', action='store_true', help="test mode")
|
19 |
parser.add_argument('--workspace', type=str, default='workspace')
|
20 |
parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
|
|
|
38 |
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
|
39 |
parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
|
40 |
# rendering resolution in training
|
41 |
+
parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
|
42 |
+
parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
|
43 |
|
44 |
### dataset options
|
45 |
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
|
|
|
48 |
parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
|
49 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
50 |
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
|
51 |
+
parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
|
52 |
+
parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
|
53 |
+
|
54 |
+
parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
|
55 |
+
parser.add_argument('--lambda_orient', type=float, default=1e-2, help="loss scale for orientation")
|
56 |
|
57 |
### GUI options
|
58 |
parser.add_argument('--gui', action='store_true', help="start a GUI")
|
|
|
60 |
parser.add_argument('--H', type=int, default=800, help="GUI height")
|
61 |
parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
|
62 |
parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
|
63 |
+
parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction in [0, 180], corresponding to elevation [90, -90]")
|
64 |
+
parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction in [0, 360), azimuth")
|
65 |
parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
|
66 |
|
67 |
opt = parser.parse_args()
|
|
|
70 |
opt.fp16 = True
|
71 |
opt.cuda_ray = True
|
72 |
opt.dir_text = True
|
73 |
+
elif opt.O2:
|
74 |
+
opt.fp16 = True
|
75 |
+
opt.dir_text = True
|
76 |
+
|
77 |
if opt.backbone == 'vanilla':
|
78 |
from nerf.network import NeRFNetwork
|
79 |
elif opt.backbone == 'tcnn':
|
nerf/gui.py
CHANGED
@@ -34,14 +34,14 @@ class OrbitCamera:
|
|
34 |
# intrinsics
|
35 |
@property
|
36 |
def intrinsics(self):
|
37 |
-
focal = self.H / (2 * np.tan(np.
|
38 |
return np.array([focal, focal, self.W // 2, self.H // 2])
|
39 |
|
40 |
def orbit(self, dx, dy):
|
41 |
# rotate along camera up/side axis!
|
42 |
side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized.
|
43 |
-
rotvec_x = self.up * np.
|
44 |
-
rotvec_y = side * np.
|
45 |
self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot
|
46 |
|
47 |
def scale(self, delta):
|
|
|
34 |
# intrinsics
|
35 |
@property
|
36 |
def intrinsics(self):
|
37 |
+
focal = self.H / (2 * np.tan(np.deg2rad(self.fovy) / 2))
|
38 |
return np.array([focal, focal, self.W // 2, self.H // 2])
|
39 |
|
40 |
def orbit(self, dx, dy):
|
41 |
# rotate along camera up/side axis!
|
42 |
side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized.
|
43 |
+
rotvec_x = self.up * np.deg2rad(-0.1 * dx)
|
44 |
+
rotvec_y = side * np.deg2rad(-0.1 * dy)
|
45 |
self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot
|
46 |
|
47 |
def scale(self, delta):
|
nerf/network.py
CHANGED
@@ -37,26 +37,22 @@ class NeRFNetwork(NeRFRenderer):
|
|
37 |
opt,
|
38 |
num_layers=5,
|
39 |
hidden_dim=128,
|
40 |
-
num_layers_bg=
|
41 |
-
hidden_dim_bg=
|
42 |
):
|
43 |
|
44 |
super().__init__(opt)
|
45 |
|
46 |
self.num_layers = num_layers
|
47 |
self.hidden_dim = hidden_dim
|
48 |
-
|
49 |
self.encoder, self.in_dim = get_encoder('frequency', input_dim=3)
|
50 |
-
|
51 |
self.sigma_net = MLP(self.in_dim, 4, hidden_dim, num_layers, bias=True)
|
52 |
|
53 |
# background network
|
54 |
if self.bg_radius > 0:
|
55 |
self.num_layers_bg = num_layers_bg
|
56 |
self.hidden_dim_bg = hidden_dim_bg
|
57 |
-
|
58 |
-
self.encoder_bg, self.in_dim_bg = get_encoder('tiledgrid', input_dim=2)
|
59 |
-
|
60 |
self.bg_net = MLP(self.in_dim_bg, 3, hidden_dim_bg, num_layers_bg, bias=True)
|
61 |
|
62 |
else:
|
@@ -84,7 +80,7 @@ class NeRFNetwork(NeRFRenderer):
|
|
84 |
return sigma, albedo
|
85 |
|
86 |
# ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
|
87 |
-
def
|
88 |
# x: [N, 3]
|
89 |
dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
90 |
dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
@@ -116,7 +112,7 @@ class NeRFNetwork(NeRFRenderer):
|
|
116 |
# query normal
|
117 |
|
118 |
# sigma, albedo = self.common_forward(x)
|
119 |
-
# normal = self.
|
120 |
|
121 |
with torch.enable_grad():
|
122 |
x.requires_grad_(True)
|
@@ -128,11 +124,6 @@ class NeRFNetwork(NeRFRenderer):
|
|
128 |
normal = safe_normalize(normal)
|
129 |
normal[torch.isnan(normal)] = 0
|
130 |
|
131 |
-
# light direction (random if not provided)
|
132 |
-
if l is None:
|
133 |
-
l = torch.randn(3, device=x.device, dtype=torch.float)
|
134 |
-
l = safe_normalize(l)
|
135 |
-
|
136 |
# lambertian shading
|
137 |
lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
|
138 |
|
|
|
37 |
opt,
|
38 |
num_layers=5,
|
39 |
hidden_dim=128,
|
40 |
+
num_layers_bg=2,
|
41 |
+
hidden_dim_bg=64,
|
42 |
):
|
43 |
|
44 |
super().__init__(opt)
|
45 |
|
46 |
self.num_layers = num_layers
|
47 |
self.hidden_dim = hidden_dim
|
|
|
48 |
self.encoder, self.in_dim = get_encoder('frequency', input_dim=3)
|
|
|
49 |
self.sigma_net = MLP(self.in_dim, 4, hidden_dim, num_layers, bias=True)
|
50 |
|
51 |
# background network
|
52 |
if self.bg_radius > 0:
|
53 |
self.num_layers_bg = num_layers_bg
|
54 |
self.hidden_dim_bg = hidden_dim_bg
|
55 |
+
self.encoder_bg, self.in_dim_bg = get_encoder('frequency', input_dim=2)
|
|
|
|
|
56 |
self.bg_net = MLP(self.in_dim_bg, 3, hidden_dim_bg, num_layers_bg, bias=True)
|
57 |
|
58 |
else:
|
|
|
80 |
return sigma, albedo
|
81 |
|
82 |
# ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
|
83 |
+
def finite_difference_normal(self, x, epsilon=5e-4):
|
84 |
# x: [N, 3]
|
85 |
dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
86 |
dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
|
|
112 |
# query normal
|
113 |
|
114 |
# sigma, albedo = self.common_forward(x)
|
115 |
+
# normal = self.finite_difference_normal(x)
|
116 |
|
117 |
with torch.enable_grad():
|
118 |
x.requires_grad_(True)
|
|
|
124 |
normal = safe_normalize(normal)
|
125 |
normal[torch.isnan(normal)] = 0
|
126 |
|
|
|
|
|
|
|
|
|
|
|
127 |
# lambertian shading
|
128 |
lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
|
129 |
|
nerf/network_grid.py
CHANGED
@@ -87,7 +87,7 @@ class NeRFNetwork(NeRFRenderer):
|
|
87 |
return sigma, albedo
|
88 |
|
89 |
# ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
|
90 |
-
def
|
91 |
# x: [N, 3]
|
92 |
dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
93 |
dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
@@ -119,7 +119,7 @@ class NeRFNetwork(NeRFRenderer):
|
|
119 |
# query normal
|
120 |
|
121 |
sigma, albedo = self.common_forward(x)
|
122 |
-
normal = self.
|
123 |
|
124 |
# with torch.enable_grad():
|
125 |
# x.requires_grad_(True)
|
@@ -131,11 +131,6 @@ class NeRFNetwork(NeRFRenderer):
|
|
131 |
normal = safe_normalize(normal)
|
132 |
normal[torch.isnan(normal)] = 0
|
133 |
|
134 |
-
# light direction (random if not provided)
|
135 |
-
if l is None:
|
136 |
-
l = torch.randn(3, device=x.device, dtype=torch.float)
|
137 |
-
l = safe_normalize(l)
|
138 |
-
|
139 |
# lambertian shading
|
140 |
lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
|
141 |
|
|
|
87 |
return sigma, albedo
|
88 |
|
89 |
# ref: https://github.com/zhaofuq/Instant-NSR/blob/main/nerf/network_sdf.py#L192
|
90 |
+
def finite_difference_normal(self, x, epsilon=5e-4):
|
91 |
# x: [N, 3]
|
92 |
dx_pos, _ = self.common_forward((x + torch.tensor([[epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
93 |
dx_neg, _ = self.common_forward((x + torch.tensor([[-epsilon, 0.00, 0.00]], device=x.device)).clamp(-self.bound, self.bound))
|
|
|
119 |
# query normal
|
120 |
|
121 |
sigma, albedo = self.common_forward(x)
|
122 |
+
normal = self.finite_difference_normal(x)
|
123 |
|
124 |
# with torch.enable_grad():
|
125 |
# x.requires_grad_(True)
|
|
|
131 |
normal = safe_normalize(normal)
|
132 |
normal[torch.isnan(normal)] = 0
|
133 |
|
|
|
|
|
|
|
|
|
|
|
134 |
# lambertian shading
|
135 |
lambertian = ratio + (1 - ratio) * (normal @ -l).clamp(min=0) # [N,]
|
136 |
|
nerf/network_tcnn.py
CHANGED
@@ -133,11 +133,6 @@ class NeRFNetwork(NeRFRenderer):
|
|
133 |
if not has_grad:
|
134 |
normal = normal.detach()
|
135 |
|
136 |
-
# light direction (random if not provided)
|
137 |
-
if l is None:
|
138 |
-
l = torch.randn(3, device=x.device, dtype=torch.float)
|
139 |
-
l = l / (torch.norm(l, dim=-1, keepdim=True) + 1e-9)
|
140 |
-
|
141 |
# lambertian shading
|
142 |
lambertian = ratio + (1 - ratio) * (normal @ l).clamp(min=0) # [N,]
|
143 |
|
|
|
133 |
if not has_grad:
|
134 |
normal = normal.detach()
|
135 |
|
|
|
|
|
|
|
|
|
|
|
136 |
# lambertian shading
|
137 |
lambertian = ratio + (1 - ratio) * (normal @ l).clamp(min=0) # [N,]
|
138 |
|
nerf/provider.py
CHANGED
@@ -35,37 +35,42 @@ def visualize_poses(poses, size=0.1):
|
|
35 |
|
36 |
trimesh.Scene(objects).show()
|
37 |
|
38 |
-
def get_view_direction(thetas, phis):
|
39 |
-
# phis [B,];
|
40 |
-
# front = 0 0
|
41 |
-
# side (left) = 1
|
42 |
-
# back = 2 180
|
43 |
-
# side (right) = 3
|
44 |
-
# top = 4
|
45 |
-
# bottom = 5
|
46 |
-
res = torch.zeros(
|
47 |
# first determine by phis
|
48 |
-
res[(phis <
|
49 |
-
res[(phis >=
|
50 |
-
res[(phis >= np.pi) & (phis < (
|
51 |
-
res[(phis >= (
|
52 |
# override by thetas
|
53 |
-
res[thetas
|
54 |
-
res[thetas >= (
|
55 |
return res
|
56 |
|
57 |
|
58 |
-
def rand_poses(size, device,
|
59 |
''' generate random poses from an orbit camera
|
60 |
Args:
|
61 |
size: batch size of generated poses.
|
62 |
device: where to allocate the output.
|
63 |
radius: camera radius
|
64 |
-
theta_range: [min, max], should be in [0,
|
65 |
-
phi_range: [min, max], should be in [0, 2
|
66 |
Return:
|
67 |
poses: [size, 4, 4]
|
68 |
'''
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0]
|
71 |
thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0]
|
@@ -94,14 +99,19 @@ def rand_poses(size, device, return_dirs=False, radius_range=[1, 1.5], theta_ran
|
|
94 |
poses[:, :3, 3] = centers
|
95 |
|
96 |
if return_dirs:
|
97 |
-
dirs = get_view_direction(thetas, phis)
|
98 |
else:
|
99 |
dirs = None
|
100 |
|
101 |
return poses, dirs
|
102 |
|
103 |
|
104 |
-
def circle_poses(device,
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
thetas = torch.FloatTensor([theta]).to(device)
|
107 |
phis = torch.FloatTensor([phi]).to(device)
|
@@ -123,7 +133,7 @@ def circle_poses(device, return_dirs=False, radius=1.25, theta=np.pi/2, phi=0):
|
|
123 |
poses[:, :3, 3] = centers
|
124 |
|
125 |
if return_dirs:
|
126 |
-
dirs = get_view_direction(thetas, phis)
|
127 |
else:
|
128 |
dirs = None
|
129 |
|
@@ -160,20 +170,20 @@ class NeRFDataset:
|
|
160 |
|
161 |
if self.training:
|
162 |
# random pose on the fly
|
163 |
-
poses, dirs = rand_poses(B, self.device, return_dirs=self.opt.dir_text,
|
164 |
|
165 |
# random focal
|
166 |
fov = random.random() * (self.fovy_range[1] - self.fovy_range[0]) + self.fovy_range[0]
|
167 |
-
focal = self.H / (2 * np.tan(np.
|
168 |
intrinsics = np.array([focal, focal, self.cx, self.cy])
|
169 |
else:
|
170 |
# circle pose
|
171 |
-
phi = (index[0] / self.size) *
|
172 |
-
poses, dirs = circle_poses(self.device,
|
173 |
|
174 |
# fixed focal
|
175 |
fov = (self.fovy_range[1] + self.fovy_range[0]) / 2
|
176 |
-
focal = self.H / (2 * np.tan(np.
|
177 |
intrinsics = np.array([focal, focal, self.cx, self.cy])
|
178 |
|
179 |
|
|
|
35 |
|
36 |
trimesh.Scene(objects).show()
|
37 |
|
38 |
+
def get_view_direction(thetas, phis, overhead, front):
|
39 |
+
# phis [B,]; thetas: [B,]
|
40 |
+
# front = 0 [0, front)
|
41 |
+
# side (left) = 1 [front, 180)
|
42 |
+
# back = 2 [180, 180+front)
|
43 |
+
# side (right) = 3 [180+front, 360)
|
44 |
+
# top = 4 [0, overhead]
|
45 |
+
# bottom = 5 [180-overhead, 180]
|
46 |
+
res = torch.zeros(thetas.shape[0], dtype=torch.long)
|
47 |
# first determine by phis
|
48 |
+
res[(phis < front)] = 0
|
49 |
+
res[(phis >= front) & (phis < np.pi)] = 1
|
50 |
+
res[(phis >= np.pi) & (phis < (np.pi + front))] = 2
|
51 |
+
res[(phis >= (np.pi + front))] = 3
|
52 |
# override by thetas
|
53 |
+
res[thetas <= overhead] = 4
|
54 |
+
res[thetas >= (np.pi - overhead)] = 5
|
55 |
return res
|
56 |
|
57 |
|
58 |
+
def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0, 150], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60):
|
59 |
''' generate random poses from an orbit camera
|
60 |
Args:
|
61 |
size: batch size of generated poses.
|
62 |
device: where to allocate the output.
|
63 |
radius: camera radius
|
64 |
+
theta_range: [min, max], should be in [0, pi]
|
65 |
+
phi_range: [min, max], should be in [0, 2 * pi]
|
66 |
Return:
|
67 |
poses: [size, 4, 4]
|
68 |
'''
|
69 |
+
|
70 |
+
theta_range = np.deg2rad(theta_range)
|
71 |
+
phi_range = np.deg2rad(phi_range)
|
72 |
+
angle_overhead = np.deg2rad(angle_overhead)
|
73 |
+
angle_front = np.deg2rad(angle_front)
|
74 |
|
75 |
radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0]
|
76 |
thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0]
|
|
|
99 |
poses[:, :3, 3] = centers
|
100 |
|
101 |
if return_dirs:
|
102 |
+
dirs = get_view_direction(thetas, phis, angle_overhead, angle_front)
|
103 |
else:
|
104 |
dirs = None
|
105 |
|
106 |
return poses, dirs
|
107 |
|
108 |
|
109 |
+
def circle_poses(device, radius=1.25, theta=60, phi=0, return_dirs=False, angle_overhead=30, angle_front=60):
|
110 |
+
|
111 |
+
theta = np.deg2rad(theta)
|
112 |
+
phi = np.deg2rad(phi)
|
113 |
+
angle_overhead = np.deg2rad(angle_overhead)
|
114 |
+
angle_front = np.deg2rad(angle_front)
|
115 |
|
116 |
thetas = torch.FloatTensor([theta]).to(device)
|
117 |
phis = torch.FloatTensor([phi]).to(device)
|
|
|
133 |
poses[:, :3, 3] = centers
|
134 |
|
135 |
if return_dirs:
|
136 |
+
dirs = get_view_direction(thetas, phis, angle_overhead, angle_front)
|
137 |
else:
|
138 |
dirs = None
|
139 |
|
|
|
170 |
|
171 |
if self.training:
|
172 |
# random pose on the fly
|
173 |
+
poses, dirs = rand_poses(B, self.device, radius_range=self.radius_range, return_dirs=self.opt.dir_text, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
|
174 |
|
175 |
# random focal
|
176 |
fov = random.random() * (self.fovy_range[1] - self.fovy_range[0]) + self.fovy_range[0]
|
177 |
+
focal = self.H / (2 * np.tan(np.deg2rad(fov) / 2))
|
178 |
intrinsics = np.array([focal, focal, self.cx, self.cy])
|
179 |
else:
|
180 |
# circle pose
|
181 |
+
phi = (index[0] / self.size) * 360
|
182 |
+
poses, dirs = circle_poses(self.device, radius=self.radius_range[1] * 1.2, theta=60, phi=phi, return_dirs=self.opt.dir_text, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
|
183 |
|
184 |
# fixed focal
|
185 |
fov = (self.fovy_range[1] + self.fovy_range[0]) / 2
|
186 |
+
focal = self.H / (2 * np.tan(np.deg2rad(fov) / 2))
|
187 |
intrinsics = np.array([focal, focal, self.cx, self.cy])
|
188 |
|
189 |
|
nerf/renderer.py
CHANGED
@@ -448,6 +448,12 @@ class NeRFRenderer(nn.Module):
|
|
448 |
# pre-calculate near far
|
449 |
nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer)
|
450 |
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
results = {}
|
452 |
|
453 |
if self.training:
|
@@ -476,11 +482,6 @@ class NeRFRenderer(nn.Module):
|
|
476 |
|
477 |
# allocate outputs
|
478 |
dtype = torch.float32
|
479 |
-
|
480 |
-
# fix light for all samples if not provided
|
481 |
-
if light_d is None:
|
482 |
-
light_d = torch.randn(3, device=device, dtype=torch.float)
|
483 |
-
light_d = safe_normalize(light_d)
|
484 |
|
485 |
weights_sum = torch.zeros(N, dtype=dtype, device=device)
|
486 |
depth = torch.zeros(N, dtype=dtype, device=device)
|
|
|
448 |
# pre-calculate near far
|
449 |
nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer)
|
450 |
|
451 |
+
# random sample light_d if not provided
|
452 |
+
if light_d is None:
|
453 |
+
# gaussian noise around the ray origin, so the light always face the view dir (avoid dark face)
|
454 |
+
light_d = - (rays_o[0] + torch.randn(3, device=device, dtype=torch.float))
|
455 |
+
light_d = safe_normalize(light_d)
|
456 |
+
|
457 |
results = {}
|
458 |
|
459 |
if self.training:
|
|
|
482 |
|
483 |
# allocate outputs
|
484 |
dtype = torch.float32
|
|
|
|
|
|
|
|
|
|
|
485 |
|
486 |
weights_sum = torch.zeros(N, dtype=dtype, device=device)
|
487 |
depth = torch.zeros(N, dtype=dtype, device=device)
|
nerf/utils.py
CHANGED
@@ -365,11 +365,11 @@ class Trainer(object):
|
|
365 |
# alphas = alphas ** 2 # skewed entropy, favors 0 over 1
|
366 |
loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
|
367 |
|
368 |
-
loss = loss_guidance +
|
369 |
|
370 |
if 'loss_orient' in outputs:
|
371 |
loss_orient = outputs['loss_orient']
|
372 |
-
loss = loss +
|
373 |
|
374 |
return pred_rgb, pred_ws, loss
|
375 |
|
@@ -398,7 +398,7 @@ class Trainer(object):
|
|
398 |
# alphas = alphas ** 2 # skewed entropy, favors 0 over 1
|
399 |
loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
|
400 |
|
401 |
-
loss =
|
402 |
|
403 |
return pred_rgb, pred_depth, loss
|
404 |
|
@@ -638,7 +638,7 @@ class Trainer(object):
|
|
638 |
return outputs
|
639 |
|
640 |
def train_one_epoch(self, loader):
|
641 |
-
self.log(f"==> Start Training Epoch {self.epoch}, lr={self.optimizer.param_groups[0]['lr']:.6f} ...")
|
642 |
|
643 |
total_loss = 0
|
644 |
if self.local_rank == 0 and self.report_metric_at_train:
|
@@ -722,7 +722,7 @@ class Trainer(object):
|
|
722 |
|
723 |
|
724 |
def evaluate_one_epoch(self, loader, name=None):
|
725 |
-
self.log(f"++> Evaluate at epoch {self.epoch} ...")
|
726 |
|
727 |
if name is None:
|
728 |
name = f'{self.name}_ep{self.epoch:04d}'
|
|
|
365 |
# alphas = alphas ** 2 # skewed entropy, favors 0 over 1
|
366 |
loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
|
367 |
|
368 |
+
loss = loss_guidance + self.opt.lambda_entropy * loss_entropy
|
369 |
|
370 |
if 'loss_orient' in outputs:
|
371 |
loss_orient = outputs['loss_orient']
|
372 |
+
loss = loss + self.opt.lambda_orient * loss_orient
|
373 |
|
374 |
return pred_rgb, pred_ws, loss
|
375 |
|
|
|
398 |
# alphas = alphas ** 2 # skewed entropy, favors 0 over 1
|
399 |
loss_entropy = (- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean()
|
400 |
|
401 |
+
loss = self.opt.lambda_entropy * loss_entropy
|
402 |
|
403 |
return pred_rgb, pred_depth, loss
|
404 |
|
|
|
638 |
return outputs
|
639 |
|
640 |
def train_one_epoch(self, loader):
|
641 |
+
self.log(f"==> Start Training {self.workspace} Epoch {self.epoch}, lr={self.optimizer.param_groups[0]['lr']:.6f} ...")
|
642 |
|
643 |
total_loss = 0
|
644 |
if self.local_rank == 0 and self.report_metric_at_train:
|
|
|
722 |
|
723 |
|
724 |
def evaluate_one_epoch(self, loader, name=None):
|
725 |
+
self.log(f"++> Evaluate {self.workspace} at epoch {self.epoch} ...")
|
726 |
|
727 |
if name is None:
|
728 |
name = f'{self.name}_ep{self.epoch:04d}'
|
readme.md
CHANGED
@@ -4,14 +4,14 @@ A pytorch implementation of the text-to-3D model **Dreamfusion**, powered by the
|
|
4 |
|
5 |
The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
|
6 |
|
7 |
-
Examples generated from text prompt `a
|
8 |
|
9 |
https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
|
10 |
|
11 |
### [Gallery](https://github.com/ashawkey/stable-dreamfusion/issues/1) | [Update Logs](assets/update_logs.md)
|
12 |
|
13 |
# Important Notice
|
14 |
-
This project is a **work-in-progress**, and contains lots of differences from the paper. Also, many features are still not implemented now. **The current generation quality cannot match the results from the original paper, and still fail badly
|
15 |
|
16 |
|
17 |
## Notable differences from the paper
|
@@ -83,7 +83,7 @@ python main_nerf.py --text "a hamburger" --workspace trial_clip -O --guidance cl
|
|
83 |
python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
|
84 |
```
|
85 |
|
86 |
-
# Code organization
|
87 |
|
88 |
This is a simple description of the most important implementation details.
|
89 |
If you are interested in improving this repo, this might be a starting point.
|
@@ -101,14 +101,50 @@ w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
|
|
101 |
grad = w * (noise_pred - noise)
|
102 |
latents.backward(gradient=grad, retain_graph=True)
|
103 |
```
|
104 |
-
* Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
|
|
|
105 |
* NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
# Acknowledgement
|
108 |
|
109 |
* The amazing original work: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
* Huge thanks to the [Stable Diffusion](https://github.com/CompVis/stable-diffusion) and the [diffusers](https://github.com/huggingface/diffusers) library.
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
* The GUI is developed with [DearPyGui](https://github.com/hoffstadt/DearPyGui).
|
|
|
4 |
|
5 |
The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
|
6 |
|
7 |
+
Examples generated from text prompt `a high quality photo of a pineapple` viewed with the GUI in real time:
|
8 |
|
9 |
https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
|
10 |
|
11 |
### [Gallery](https://github.com/ashawkey/stable-dreamfusion/issues/1) | [Update Logs](assets/update_logs.md)
|
12 |
|
13 |
# Important Notice
|
14 |
+
This project is a **work-in-progress**, and contains lots of differences from the paper. Also, many features are still not implemented now. **The current generation quality cannot match the results from the original paper, and many prompts still fail badly!**
|
15 |
|
16 |
|
17 |
## Notable differences from the paper
|
|
|
83 |
python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
|
84 |
```
|
85 |
|
86 |
+
# Code organization & Advanced tips
|
87 |
|
88 |
This is a simple description of the most important implementation details.
|
89 |
If you are interested in improving this repo, this might be a starting point.
|
|
|
101 |
grad = w * (noise_pred - noise)
|
102 |
latents.backward(gradient=grad, retain_graph=True)
|
103 |
```
|
104 |
+
* Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
|
105 |
+
* The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
|
106 |
* NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
|
107 |
+
* Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
|
108 |
+
* use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian ,and textureless
|
109 |
+
* light direction: current implementation use a plane light source, instead of a point light source...
|
110 |
+
* View-dependent prompting: `./nerf/provider.pu > get_view_direction`.
|
111 |
+
* ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
|
112 |
+
* Network backbone (`./nerf/network*.py`) can be chosen by the `--backbone` option, but `tcnn` and `vanilla` are not well tested.
|
113 |
+
* the occupancy grid based training acceleration (instant-ngp like) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later.
|
114 |
+
* Spatial density bias (gaussian density blob): `./nerf/network*.py > NeRFNetwork > gaussian`.
|
115 |
|
116 |
# Acknowledgement
|
117 |
|
118 |
* The amazing original work: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
|
119 |
+
```
|
120 |
+
@article{poole2022dreamfusion,
|
121 |
+
author = {Poole, Ben and Jain, Ajay and Barron, Jonathan T. and Mildenhall, Ben},
|
122 |
+
title = {DreamFusion: Text-to-3D using 2D Diffusion},
|
123 |
+
journal = {arXiv},
|
124 |
+
year = {2022},
|
125 |
+
}
|
126 |
+
```
|
127 |
|
128 |
* Huge thanks to the [Stable Diffusion](https://github.com/CompVis/stable-diffusion) and the [diffusers](https://github.com/huggingface/diffusers) library.
|
129 |
|
130 |
+
```
|
131 |
+
@misc{rombach2021highresolution,
|
132 |
+
title={High-Resolution Image Synthesis with Latent Diffusion Models},
|
133 |
+
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
|
134 |
+
year={2021},
|
135 |
+
eprint={2112.10752},
|
136 |
+
archivePrefix={arXiv},
|
137 |
+
primaryClass={cs.CV}
|
138 |
+
}
|
139 |
+
|
140 |
+
@misc{von-platen-etal-2022-diffusers,
|
141 |
+
author = {Patrick von Platen and Suraj Patil and Anton Lozhkov and Pedro Cuenca and Nathan Lambert and Kashif Rasul and Mishig Davaadorj and Thomas Wolf},
|
142 |
+
title = {Diffusers: State-of-the-art diffusion models},
|
143 |
+
year = {2022},
|
144 |
+
publisher = {GitHub},
|
145 |
+
journal = {GitHub repository},
|
146 |
+
howpublished = {\url{https://github.com/huggingface/diffusers}}
|
147 |
+
}
|
148 |
+
```
|
149 |
|
150 |
* The GUI is developed with [DearPyGui](https://github.com/hoffstadt/DearPyGui).
|