Spaces:

yslan
/

worldmem

Running on Zero

xizaoqu commited on Apr 10

Commit

594fef7

1 Parent(s): 0d5deae

update precision

Files changed (3) hide show

algorithms/worldmem/df_video.py CHANGED Viewed

@@ -829,8 +829,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
         for ai in range(len(new_actions)):
-            from time import time
-            start_time = time()
             last_frame = xs_pred[-1].clone()
             curr_actions = new_actions[ai]
@@ -886,7 +884,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
                 image_width=first_frame.shape[-1], image_height=first_frame.shape[-2]
             )
-            mid_time = time()
             # Perform sampling for each step in the scheduling matrix
             for m in range(scheduling_matrix.shape[0] - 1):
                 from_noise_levels, to_noise_levels = self._prepare_noise_levels(
@@ -905,10 +902,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
                     frame_idx=frame_idx_list
                 ).cpu()
-            end_time = time()
-            print("time:", end_time - start_time, "mid time:", mid_time - start_time)
             if condition_similar_length:
                 xs_pred = xs_pred[:-condition_similar_length]

         for ai in range(len(new_actions)):
             last_frame = xs_pred[-1].clone()
             curr_actions = new_actions[ai]
                 image_width=first_frame.shape[-1], image_height=first_frame.shape[-2]
             )
             # Perform sampling for each step in the scheduling matrix
             for m in range(scheduling_matrix.shape[0] - 1):
                 from_noise_levels, to_noise_levels = self._prepare_noise_levels(
                     frame_idx=frame_idx_list
                 ).cpu()
             if condition_similar_length:
                 xs_pred = xs_pred[:-condition_similar_length]

algorithms/worldmem/models/dit.py CHANGED Viewed

@@ -487,8 +487,6 @@ class DiT(nn.Module):
         t: (B, T,) tensor of diffusion timesteps
         """
-        from time import time
-        start = time()
         B, T, C, H, W = x.shape
         # add spatial embeddings
@@ -552,8 +550,6 @@ class DiT(nn.Module):
         # print("self.blocks[0].r_adaLN_modulation[1].weight:", self.blocks[0].r_adaLN_modulation[1].weight)
         # print("self.blocks[0].t_adaLN_modulation[1].weight:", self.blocks[0].t_adaLN_modulation[1].weight)
-        end_time = time()
-        print("in model time:", end_time - start)
         return x

         t: (B, T,) tensor of diffusion timesteps
         """
         B, T, C, H, W = x.shape
         # add spatial embeddings
         # print("self.blocks[0].r_adaLN_modulation[1].weight:", self.blocks[0].r_adaLN_modulation[1].weight)
         # print("self.blocks[0].t_adaLN_modulation[1].weight:", self.blocks[0].t_adaLN_modulation[1].weight)
         return x

app.py CHANGED Viewed

@@ -201,7 +201,11 @@ self_memory_c2w = None
 self_frame_idx = None
-@spaces.GPU()
 def run_interactive(first_frame, action, first_pose, device, self_frames, self_actions,
                             self_poses, self_memory_c2w, self_frame_idx):
     new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = worldmem.interactive(first_frame,
@@ -271,6 +275,7 @@ def reset():
     self_frames = None
     self_poses = None
     self_memory_c2w = None
     self_frame_idx = None
     memory_frames = load_image_as_tensor(DEFAULT_IMAGE).numpy()[None]

 self_frame_idx = None
+def get_duration_single_image_to_long_video(first_frame, action, first_pose, device, self_frames, self_actions,
+                            self_poses, self_memory_c2w, self_frame_idx):
+    return 5 * len(action) is self_actions is not None else 5
+@spaces.GPU(duration=get_duration_single_image_to_long_video)
 def run_interactive(first_frame, action, first_pose, device, self_frames, self_actions,
                             self_poses, self_memory_c2w, self_frame_idx):
     new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = worldmem.interactive(first_frame,
     self_frames = None
     self_poses = None
+    self_actions = None
     self_memory_c2w = None
     self_frame_idx = None
     memory_frames = load_image_as_tensor(DEFAULT_IMAGE).numpy()[None]