Spaces:

descript
/

vampnet

Runtime error

App Files Files Community

Hugo Flores Garcia commited on Jul 11, 2023

Commit

2f3fb32

1 Parent(s): 3346920

interface

Browse files

Files changed (4) hide show

.gitignore +2 -1
demo.py +27 -5
vampnet/interface.py +23 -12
vampnet/mask.py +4 -4

.gitignore CHANGED Viewed

@@ -176,4 +176,5 @@ lyrebird-audio-codec
 samples-*/**
 gradio-outputs/
-models/

 samples-*/**
 gradio-outputs/
+models/
+samples*/

demo.py CHANGED Viewed

@@ -130,8 +130,6 @@ def _vamp(data, return_mask=False):
     out_dir = OUT_DIR / str(uuid.uuid4())
     out_dir.mkdir()
     sig = at.AudioSignal(data[input_audio])
-    #pitch shift input
-    sig = sig.shift_pitch(data[input_pitch_shift])
     # TODO: random pitch shift of segments in the signal to prompt! window size should be a parameter, pitch shift width should be a parameter
@@ -160,10 +158,20 @@ def _vamp(data, return_mask=False):
         mask = pmask.mask_or(
             mask, pmask.onset_mask(sig, z, interface, width=data[onset_mask_width])
         )
     # these should be the last two mask ops
     mask = pmask.dropout(mask, data[dropout])
     mask = pmask.codebook_unmask(mask, ncc)
     print(f"created mask with: linear random {data[rand_mask_intensity]}, inpaint {data[prefix_s]}:{data[suffix_s]}, periodic {data[periodic_p]}:{data[periodic_w]}, dropout {data[dropout]}, codebook unmask {ncc}, onset mask {data[onset_mask_width]}, num steps {data[num_steps]}, init temp {data[temp]},  use coarse2fine {data[use_coarse2fine]}")
     # save the mask as a txt file
     np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
@@ -322,6 +330,18 @@ with gr.Blocks() as demo:
                 value=5,
             )
             with gr.Accordion("extras ", open=False):
                 n_conditioning_codebooks = gr.Number(
                     label="number of conditioning codebooks. probably 0",
@@ -355,14 +375,14 @@ with gr.Blocks() as demo:
             temp = gr.Slider(
                 label="temperature",
                 minimum=0.0,
-                maximum=1.5,
                 value=0.8
             )
             with gr.Accordion("sampling settings", open=False):
                 typical_filtering = gr.Checkbox(
                     label="typical filtering ",
-                    value=True
                 )
                 typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
@@ -440,7 +460,9 @@ with gr.Blocks() as demo:
             typical_filtering,
             typical_mass,
             typical_min_tokens,
-            checkpoint_key
         }
     # connect widgets

     out_dir = OUT_DIR / str(uuid.uuid4())
     out_dir.mkdir()
     sig = at.AudioSignal(data[input_audio])
     # TODO: random pitch shift of segments in the signal to prompt! window size should be a parameter, pitch shift width should be a parameter
         mask = pmask.mask_or(
             mask, pmask.onset_mask(sig, z, interface, width=data[onset_mask_width])
         )
+    if data[beat_mask_width] > 0:
+        beat_mask = interface.make_beat_mask(
+            sig,
+            before_beat_s=(data[beat_mask_width]/1000)/2,
+            after_beat_s=(data[beat_mask_width]/1000)/2,
+            mask_upbeats=not data[beat_mask_downbeats],
+        )
+        mask = pmask.mask_and(mask, beat_mask)
     # these should be the last two mask ops
     mask = pmask.dropout(mask, data[dropout])
     mask = pmask.codebook_unmask(mask, ncc)
     print(f"created mask with: linear random {data[rand_mask_intensity]}, inpaint {data[prefix_s]}:{data[suffix_s]}, periodic {data[periodic_p]}:{data[periodic_w]}, dropout {data[dropout]}, codebook unmask {ncc}, onset mask {data[onset_mask_width]}, num steps {data[num_steps]}, init temp {data[temp]},  use coarse2fine {data[use_coarse2fine]}")
     # save the mask as a txt file
     np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
                 value=5,
             )
+            beat_mask_width = gr.Slider(
+                label="beat mask width (in milliseconds)",
+                minimum=0,
+                maximum=200,
+                value=0,
+            )
+            beat_mask_downbeats = gr.Checkbox(
+                label="beat mask downbeats only?",
+                value=False
+            )
             with gr.Accordion("extras ", open=False):
                 n_conditioning_codebooks = gr.Number(
                     label="number of conditioning codebooks. probably 0",
             temp = gr.Slider(
                 label="temperature",
                 minimum=0.0,
+                maximum=3.0,
                 value=0.8
             )
             with gr.Accordion("sampling settings", open=False):
                 typical_filtering = gr.Checkbox(
                     label="typical filtering ",
+                    value=False
                 )
                 typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
             typical_filtering,
             typical_mass,
             typical_min_tokens,
+            checkpoint_key,
+            beat_mask_width,
+            beat_mask_downbeats
         }
     # connect widgets

vampnet/interface.py CHANGED Viewed

@@ -265,7 +265,12 @@ class Interface(torch.nn.Module):
         if invert:
             mask = 1 - mask
-        return mask[None, None, :].bool().long()
     def coarse_to_fine(
         self,
@@ -349,26 +354,32 @@ if __name__ == "__main__":
         coarse_ckpt="./models/spotdl/coarse.pth",
         coarse2fine_ckpt="./models/spotdl/c2f.pth",
         codec_ckpt="./models/spotdl/codec.pth",
-        device="cuda"
     )
-    sig = at.AudioSignal('introspection ii-1.mp3', duration=10)
     z = interface.encode(sig)
-    mask = linear_random(z, 1.0)
-    mask = mask_and(
-        mask, periodic_mask(
-            z,
-            32,
-            1,
-            random_roll=True
-        )
     )
     # mask = dropout(mask, 0.0)
     # mask = codebook_unmask(mask, 0)
     zv, mask_z = interface.coarse_vamp(
         z,
         mask=mask,

         if invert:
             mask = 1 - mask
+        mask = mask[None, None, :].bool().long()
+        if self.c2f is not None:
+            mask = mask.repeat(1, self.c2f.n_codebooks, 1)
+        else:
+            mask = mask.repeat(1, self.coarse.n_codebooks, 1)
+        return mask
     def coarse_to_fine(
         self,
         coarse_ckpt="./models/spotdl/coarse.pth",
         coarse2fine_ckpt="./models/spotdl/c2f.pth",
         codec_ckpt="./models/spotdl/codec.pth",
+        device="cuda",
+        wavebeat_ckpt="./models/wavebeat.pth"
     )
+    sig = at.AudioSignal.zeros(duration=10, sample_rate=44100)
     z = interface.encode(sig)
+    # mask = linear_random(z, 1.0)
+    # mask = mask_and(
+    #     mask, periodic_mask(
+    #         z,
+    #         32,
+    #         1,
+    #         random_roll=True
+    #     )
+    # )
+    mask = interface.make_beat_mask(
+        sig, 0.0, 0.075
     )
     # mask = dropout(mask, 0.0)
     # mask = codebook_unmask(mask, 0)
+    breakpoint()
     zv, mask_z = interface.coarse_vamp(
         z,
         mask=mask,

vampnet/mask.py CHANGED Viewed

@@ -26,9 +26,9 @@ def apply_mask(
         mask: torch.Tensor,
         mask_token: int
     ):
-    assert mask.ndim == 3, "mask must be (batch, n_codebooks, seq)"
-    assert mask.shape == x.shape, "mask must be same shape as x"
-    assert mask.dtype == torch.long, "mask must be long dtype"
     assert ~torch.any(mask > 1), "mask must be binary"
     assert ~torch.any(mask < 0), "mask must be binary"
@@ -163,7 +163,7 @@ def mask_or(
     mask1: torch.Tensor,
     mask2: torch.Tensor
 ):
-    assert mask1.shape == mask2.shape, "masks must be same shape"
     assert mask1.max() <= 1, "mask1 must be binary"
     assert mask2.max() <= 1, "mask2 must be binary"
     assert mask1.min() >= 0, "mask1 must be binary"

         mask: torch.Tensor,
         mask_token: int
     ):
+    assert mask.ndim == 3, "mask must be (batch, n_codebooks, seq), but got {mask.ndim}"
+    assert mask.shape == x.shape, f"mask must be same shape as x, but got {mask.shape} and {x.shape}"
+    assert mask.dtype == torch.long, "mask must be long dtype, but got {mask.dtype}"
     assert ~torch.any(mask > 1), "mask must be binary"
     assert ~torch.any(mask < 0), "mask must be binary"
     mask1: torch.Tensor,
     mask2: torch.Tensor
 ):
+    assert mask1.shape == mask2.shape, f"masks must be same shape, but got {mask1.shape} and {mask2.shape}"
     assert mask1.max() <= 1, "mask1 must be binary"
     assert mask2.max() <= 1, "mask2 must be binary"
     assert mask1.min() >= 0, "mask1 must be binary"