Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
5a343f4
1
Parent(s):
f4c9665
the refactor begins
Browse files- Dockerfile +0 -39
- README.md +0 -21
- conf/{vampnet-c2f.yml → c2f.yml} +0 -0
- conf/interface/interface-c2f-exp.yml +0 -5
- conf/interface/{interface-jazzpop.yml → jazzpop.yml} +0 -0
- conf/interface/{interface-maestro.yml → maestro.yml} +0 -0
- conf/interface/{interface-spotdl.yml → spotdl.yml} +0 -0
- conf/lora/birds.yml +10 -0
- conf/lora/birdss.yml +12 -0
- conf/lora/constructions.yml +2 -2
- conf/lora/lora-is-this-charlie-parker.yml +2 -2
- conf/lora/lora.yml +1 -1
- conf/lora/underworld.yml +10 -0
- conf/vampnet-groovemidi.yml +0 -54
- conf/vampnet-maestro.yml +0 -21
- demo.py +22 -10
- docker-compose.yml +0 -92
- requirements.txt +0 -31
- setup.py +3 -2
- vampnet/interface.py +25 -2
- vampnet/modules/base.py +1 -2
Dockerfile
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
FROM us.gcr.io/lyrebird-research/research-image/audio
|
2 |
-
|
3 |
-
COPY requirements.txt requirements.txt
|
4 |
-
ARG GITHUB_TOKEN
|
5 |
-
RUN echo machine github.com login ${GITHUB_TOKEN} > ~/.netrc
|
6 |
-
|
7 |
-
COPY env/alias.sh /alias.sh
|
8 |
-
COPY env/entry_script.sh /entry_script.sh
|
9 |
-
RUN cat /alias.sh >> ~/.zshrc
|
10 |
-
|
11 |
-
# USER researcher
|
12 |
-
RUN pip install Cython
|
13 |
-
RUN pip install madmom
|
14 |
-
RUN pip install --upgrade -r requirements.txt
|
15 |
-
RUN pip install --upgrade tensorflow
|
16 |
-
RUN pip install --upgrade librosa
|
17 |
-
RUN pip install --upgrade numba
|
18 |
-
RUN pip install protobuf==3.20
|
19 |
-
ENV PYTHONPATH "$PYTHONPATH:/u/home/src"
|
20 |
-
ENV NUMBA_CACHE_DIR=/tmp/
|
21 |
-
|
22 |
-
USER root
|
23 |
-
RUN wget https://github.com/jgm/pandoc/releases/download/2.18/pandoc-2.18-1-amd64.deb
|
24 |
-
RUN dpkg -i pandoc-2.18-1-amd64.deb
|
25 |
-
RUN apt-get update && apt-get install task-spooler
|
26 |
-
|
27 |
-
RUN head -n -1 /entry_script.sh > /entry_script_jupyter.sh
|
28 |
-
RUN head -n -1 /entry_script.sh > /entry_script_tensorboard.sh
|
29 |
-
RUN head -n -1 /entry_script.sh > /entry_script_gradio.sh
|
30 |
-
|
31 |
-
RUN echo \
|
32 |
-
'su -p ${USER} -c "source ~/.zshrc && jupyter lab --ip=0.0.0.0"' >> \
|
33 |
-
/entry_script_jupyter.sh
|
34 |
-
RUN echo \
|
35 |
-
'su -p ${USER} -c "source ~/.zshrc && tensorboard --logdir=$TENSORBOARD_PATH --samples_per_plugin audio=500 --bind_all"' >> \
|
36 |
-
/entry_script_tensorboard.sh
|
37 |
-
RUN echo \
|
38 |
-
'su -p ${USER} -c "source ~/.zshrc && python app.py --args.load=conf/app.yml"' >> \
|
39 |
-
/entry_script_gradio.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -2,27 +2,6 @@
|
|
2 |
|
3 |
This repository contains recipes for training generative music models on top of the Lyrebird Audio Codec.
|
4 |
|
5 |
-
## Install hooks
|
6 |
-
|
7 |
-
First install the pre-commit util:
|
8 |
-
|
9 |
-
https://pre-commit.com/#install
|
10 |
-
|
11 |
-
pip install pre-commit # with pip
|
12 |
-
brew install pre-commit # on Mac
|
13 |
-
|
14 |
-
Then install the git hooks
|
15 |
-
|
16 |
-
pre-commit install
|
17 |
-
# check .pre-commit-config.yaml for details of hooks
|
18 |
-
|
19 |
-
Upon `git commit`, the pre-commit hooks will be run automatically on the stage files (i.e. added by `git add`)
|
20 |
-
|
21 |
-
**N.B. By default, pre-commit checks only run on staged files**
|
22 |
-
|
23 |
-
If you need to run it on all files:
|
24 |
-
|
25 |
-
pre-commit run --all-files
|
26 |
|
27 |
## Development
|
28 |
### Setting everything up
|
|
|
2 |
|
3 |
This repository contains recipes for training generative music models on top of the Lyrebird Audio Codec.
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
## Development
|
7 |
### Setting everything up
|
conf/{vampnet-c2f.yml → c2f.yml}
RENAMED
File without changes
|
conf/interface/interface-c2f-exp.yml
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
Interface.coarse_ckpt: /runs/c2f-exp-03.22.23/ckpt/random/epoch=400/vampnet/weights.pth
|
2 |
-
Interface.coarse2fine_ckpt: runs/c2f-exp-03.22.23/ckpt/random/epoch=400/vampnet/weights.pth
|
3 |
-
Interface.codec_ckpt: /runs/codec-ckpt/codec.pth
|
4 |
-
Interface.coarse_chunk_size_s: 5
|
5 |
-
Interface.coarse2fine_chunk_size_s: 3
|
|
|
|
|
|
|
|
|
|
|
|
conf/interface/{interface-jazzpop.yml → jazzpop.yml}
RENAMED
File without changes
|
conf/interface/{interface-maestro.yml → maestro.yml}
RENAMED
File without changes
|
conf/interface/{interface-spotdl.yml → spotdl.yml}
RENAMED
File without changes
|
conf/lora/birds.yml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$include:
|
2 |
+
- conf/lora/lora.yml
|
3 |
+
|
4 |
+
fine_tune: True
|
5 |
+
|
6 |
+
train/AudioLoader.sources:
|
7 |
+
- /media/CHONK/hugo/spotdl/subsets/birds
|
8 |
+
|
9 |
+
val/AudioLoader.sources:
|
10 |
+
- /media/CHONK/hugo/spotdl/subsets/birds
|
conf/lora/birdss.yml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$include:
|
2 |
+
- conf/lora/lora.yml
|
3 |
+
|
4 |
+
fine_tune: True
|
5 |
+
|
6 |
+
train/AudioLoader.sources:
|
7 |
+
- /media/CHONK/hugo/spotdl/subsets/birds
|
8 |
+
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/
|
9 |
+
|
10 |
+
val/AudioLoader.sources:
|
11 |
+
- /media/CHONK/hugo/spotdl/subsets/birds
|
12 |
+
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/
|
conf/lora/constructions.yml
CHANGED
@@ -4,7 +4,7 @@ $include:
|
|
4 |
fine_tune: True
|
5 |
|
6 |
train/AudioLoader.sources:
|
7 |
-
- /media/CHONK/hugo/spotdl/subsets/constructions
|
8 |
|
9 |
val/AudioLoader.sources:
|
10 |
-
- /media/CHONK/hugo/spotdl/subsets/constructions
|
|
|
4 |
fine_tune: True
|
5 |
|
6 |
train/AudioLoader.sources:
|
7 |
+
- /media/CHONK/hugo/spotdl/subsets/constructions/third.mp3
|
8 |
|
9 |
val/AudioLoader.sources:
|
10 |
+
- /media/CHONK/hugo/spotdl/subsets/constructions/third.mp3
|
conf/lora/lora-is-this-charlie-parker.yml
CHANGED
@@ -4,7 +4,7 @@ $include:
|
|
4 |
fine_tune: True
|
5 |
|
6 |
train/AudioLoader.sources:
|
7 |
-
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/
|
8 |
|
9 |
val/AudioLoader.sources:
|
10 |
-
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/
|
|
|
4 |
fine_tune: True
|
5 |
|
6 |
train/AudioLoader.sources:
|
7 |
+
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/Charlie Parker - Donna Lee.mp3
|
8 |
|
9 |
val/AudioLoader.sources:
|
10 |
+
- /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/Charlie Parker - Donna Lee.mp3
|
conf/lora/lora.yml
CHANGED
@@ -8,7 +8,7 @@ train/AudioDataset.n_examples: 10000000
|
|
8 |
val/AudioDataset.n_examples: 10
|
9 |
|
10 |
|
11 |
-
NoamScheduler.warmup:
|
12 |
|
13 |
epoch_length: 100
|
14 |
save_audio_epochs: 2
|
|
|
8 |
val/AudioDataset.n_examples: 10
|
9 |
|
10 |
|
11 |
+
NoamScheduler.warmup: 400
|
12 |
|
13 |
epoch_length: 100
|
14 |
save_audio_epochs: 2
|
conf/lora/underworld.yml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$include:
|
2 |
+
- conf/lora/lora.yml
|
3 |
+
|
4 |
+
fine_tune: True
|
5 |
+
|
6 |
+
train/AudioLoader.sources:
|
7 |
+
- /media/CHONK/hugo/spotdl/subsets/underworld.mp3
|
8 |
+
|
9 |
+
val/AudioLoader.sources:
|
10 |
+
- /media/CHONK/hugo/spotdl/subsets/underworld.mp3
|
conf/vampnet-groovemidi.yml
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
$include:
|
2 |
-
- conf/vampnet.yml
|
3 |
-
|
4 |
-
VampNet.embedding_dim: 512
|
5 |
-
VampNet.n_layers: 12
|
6 |
-
VampNet.n_heads: 8
|
7 |
-
|
8 |
-
AudioDataset.duration: 12.0
|
9 |
-
|
10 |
-
train/AudioDataset.n_examples: 10000000
|
11 |
-
train/AudioLoader.sources:
|
12 |
-
# drummer 1 sessions 1, 2, and 3
|
13 |
-
- /data/e-gmd-v1.0.0/drummer1/session1
|
14 |
-
- /data/e-gmd-v1.0.0/drummer1/session2
|
15 |
-
- /data/e-gmd-v1.0.0/drummer1/session3
|
16 |
-
# drummer 3 sessions 1 and 2
|
17 |
-
- /data/e-gmd-v1.0.0/drummer3/session1
|
18 |
-
- /data/e-gmd-v1.0.0/drummer3/session2
|
19 |
-
# drummer 4 session 1
|
20 |
-
- /data/e-gmd-v1.0.0/drummer4/session1
|
21 |
-
# drummer 5 sessions 1 and 2
|
22 |
-
- /data/e-gmd-v1.0.0/drummer5/session1
|
23 |
-
- /data/e-gmd-v1.0.0/drummer5/session2
|
24 |
-
# drummer 6 session 1, 2, and 3
|
25 |
-
- /data/e-gmd-v1.0.0/drummer6/session1
|
26 |
-
- /data/e-gmd-v1.0.0/drummer6/session2
|
27 |
-
- /data/e-gmd-v1.0.0/drummer6/session3
|
28 |
-
# drummer 7 session 1, 2 and 3
|
29 |
-
- /data/e-gmd-v1.0.0/drummer7/session1
|
30 |
-
- /data/e-gmd-v1.0.0/drummer7/session2
|
31 |
-
- /data/e-gmd-v1.0.0/drummer7/session3
|
32 |
-
# drummer 8 session 1
|
33 |
-
- /data/e-gmd-v1.0.0/drummer8/session1
|
34 |
-
# drummer 9 session 1
|
35 |
-
- /data/e-gmd-v1.0.0/drummer9/session1
|
36 |
-
# drummer 10 session 1
|
37 |
-
- /data/e-gmd-v1.0.0/drummer10/session1
|
38 |
-
|
39 |
-
|
40 |
-
val/AudioDataset.n_examples: 500
|
41 |
-
val/AudioLoader.sources:
|
42 |
-
# drummer 1 eval session
|
43 |
-
- /data/e-gmd-v1.0.0/drummer1/eval_session
|
44 |
-
# drummer 5 eval session
|
45 |
-
- /data/e-gmd-v1.0.0/drummer5/eval_session
|
46 |
-
# drummer 7 eval session
|
47 |
-
- /data/e-gmd-v1.0.0/drummer7/eval_session
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
test/AudioDataset.n_examples: 1000
|
52 |
-
test/AudioLoader.sources:
|
53 |
-
# drummer 8 eval session
|
54 |
-
- /data/e-gmd-v1.0.0/drummer8/eval_session
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conf/vampnet-maestro.yml
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
$include:
|
2 |
-
- conf/vampnet.yml
|
3 |
-
|
4 |
-
VampNet.embedding_dim: 512
|
5 |
-
VampNet.n_layers: 12
|
6 |
-
VampNet.n_heads: 8
|
7 |
-
|
8 |
-
AudioDataset.duration: 12.0
|
9 |
-
|
10 |
-
train/AudioDataset.n_examples: 10000000
|
11 |
-
train/AudioLoader.sources:
|
12 |
-
- /data/maestro-reorg/train
|
13 |
-
|
14 |
-
val/AudioDataset.n_examples: 500
|
15 |
-
val/AudioLoader.sources:
|
16 |
-
- /data/maestro-reorg/val
|
17 |
-
|
18 |
-
|
19 |
-
test/AudioDataset.n_examples: 1000
|
20 |
-
test/AudioLoader.sources:
|
21 |
-
- /data/maestro-reorg/test
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.py
CHANGED
@@ -62,6 +62,7 @@ def load_random_audio():
|
|
62 |
def ez_vamp(
|
63 |
input_audio, init_temp, final_temp,
|
64 |
mask_periodic_amt, mask_periodic_width, num_steps,
|
|
|
65 |
):
|
66 |
print(input_audio)
|
67 |
sig = at.AudioSignal(input_audio)
|
@@ -74,7 +75,8 @@ def ez_vamp(
|
|
74 |
prefix_dur_s=0.0,
|
75 |
suffix_dur_s=0.0,
|
76 |
num_vamps=1,
|
77 |
-
downsample_factor=mask_periodic_amt,
|
|
|
78 |
periodic_width=mask_periodic_width,
|
79 |
periodic_dropout=0.0,
|
80 |
periodic_width_dropout=0.0,
|
@@ -105,7 +107,7 @@ def vamp(
|
|
105 |
num_vamps, mode, use_beats, num_steps, snap_to_beats,
|
106 |
beat_unmask_drop, mask_periodic_width,
|
107 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
108 |
-
n_conditioning_codebooks, use_coarse2fine
|
109 |
):
|
110 |
# try:
|
111 |
print(input_audio)
|
@@ -146,6 +148,7 @@ def vamp(
|
|
146 |
suffix_dur_s=suffix_s,
|
147 |
num_vamps=num_vamps,
|
148 |
downsample_factor=mask_periodic_amt,
|
|
|
149 |
periodic_width=mask_periodic_width,
|
150 |
periodic_dropout=mask_periodic_dropout,
|
151 |
periodic_width_dropout=mask_periodic_width_dropout,
|
@@ -158,7 +161,7 @@ def vamp(
|
|
158 |
|
159 |
if use_coarse2fine:
|
160 |
zv = interface.coarse_to_fine(zv)
|
161 |
-
|
162 |
|
163 |
sig = interface.to_signal(zv).cpu()
|
164 |
print("done")
|
@@ -166,9 +169,9 @@ def vamp(
|
|
166 |
out_dir = OUT_DIR / str(uuid.uuid4())
|
167 |
out_dir.mkdir()
|
168 |
sig.write(out_dir / "output.wav")
|
169 |
-
|
170 |
-
|
171 |
-
return sig.path_to_file,
|
172 |
# except Exception as e:
|
173 |
# raise gr.Error(f"failed with error: {e}")
|
174 |
|
@@ -180,7 +183,7 @@ def save_vamp(
|
|
180 |
mask_up_chk, up_factor,
|
181 |
num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats,
|
182 |
beat_unmask_drop, mask_periodic_width, mask_periodic_dropout, mask_periodic_width_dropout,
|
183 |
-
n_conditioning_codebooks, use_coarse2fine
|
184 |
):
|
185 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
186 |
out_dir.mkdir(parents=True, exist_ok=True)
|
@@ -215,6 +218,7 @@ def save_vamp(
|
|
215 |
"mask_periodic_width_dropout": mask_periodic_width_dropout,
|
216 |
"n_conditioning_codebooks": n_conditioning_codebooks,
|
217 |
"use_coarse2fine": use_coarse2fine,
|
|
|
218 |
}
|
219 |
|
220 |
# save with yaml
|
@@ -333,6 +337,14 @@ with gr.Blocks() as demo:
|
|
333 |
precision=0,
|
334 |
)
|
335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
mask_periodic_amt = gr.Slider(
|
337 |
label="periodic hint (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
|
338 |
minimum=0,
|
@@ -501,7 +513,7 @@ with gr.Blocks() as demo:
|
|
501 |
num_vamps, mode, use_beats, num_steps, snap_to_beats,
|
502 |
beat_unmask_drop, mask_periodic_width,
|
503 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
504 |
-
n_conditioning_codebooks, use_coarse2fine
|
505 |
],
|
506 |
outputs=[output_audio, audio_mask],
|
507 |
api_name="vamp"
|
@@ -520,7 +532,7 @@ with gr.Blocks() as demo:
|
|
520 |
notes_text, use_beats, num_steps, snap_to_beats,
|
521 |
beat_unmask_drop, mask_periodic_width,
|
522 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
523 |
-
n_conditioning_codebooks, use_coarse2fine
|
524 |
],
|
525 |
outputs=[thank_you, download_file]
|
526 |
)
|
@@ -529,7 +541,7 @@ with gr.Blocks() as demo:
|
|
529 |
ez_vamp_button.click(
|
530 |
fn=ez_vamp,
|
531 |
inputs=[input_audio, init_temp, final_temp, mask_periodic_amt,
|
532 |
-
mask_periodic_width, num_steps ],
|
533 |
outputs=[output_audio],
|
534 |
api_name="ez_vamp"
|
535 |
)
|
|
|
62 |
def ez_vamp(
|
63 |
input_audio, init_temp, final_temp,
|
64 |
mask_periodic_amt, mask_periodic_width, num_steps,
|
65 |
+
stretch_factor,
|
66 |
):
|
67 |
print(input_audio)
|
68 |
sig = at.AudioSignal(input_audio)
|
|
|
75 |
prefix_dur_s=0.0,
|
76 |
suffix_dur_s=0.0,
|
77 |
num_vamps=1,
|
78 |
+
downsample_factor=mask_periodic_amt,
|
79 |
+
stretch_factor=stretch_factor,
|
80 |
periodic_width=mask_periodic_width,
|
81 |
periodic_dropout=0.0,
|
82 |
periodic_width_dropout=0.0,
|
|
|
107 |
num_vamps, mode, use_beats, num_steps, snap_to_beats,
|
108 |
beat_unmask_drop, mask_periodic_width,
|
109 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
110 |
+
n_conditioning_codebooks, use_coarse2fine, stretch_factor,
|
111 |
):
|
112 |
# try:
|
113 |
print(input_audio)
|
|
|
148 |
suffix_dur_s=suffix_s,
|
149 |
num_vamps=num_vamps,
|
150 |
downsample_factor=mask_periodic_amt,
|
151 |
+
stretch_factor=stretch_factor,
|
152 |
periodic_width=mask_periodic_width,
|
153 |
periodic_dropout=mask_periodic_dropout,
|
154 |
periodic_width_dropout=mask_periodic_width_dropout,
|
|
|
161 |
|
162 |
if use_coarse2fine:
|
163 |
zv = interface.coarse_to_fine(zv)
|
164 |
+
mask = interface.to_signal(mask_z).cpu()
|
165 |
|
166 |
sig = interface.to_signal(zv).cpu()
|
167 |
print("done")
|
|
|
169 |
out_dir = OUT_DIR / str(uuid.uuid4())
|
170 |
out_dir.mkdir()
|
171 |
sig.write(out_dir / "output.wav")
|
172 |
+
mask.write(out_dir / "mask.wav")
|
173 |
+
return sig.path_to_file, mask.path_to_file
|
174 |
+
# return sig.path_to_file, mask_z
|
175 |
# except Exception as e:
|
176 |
# raise gr.Error(f"failed with error: {e}")
|
177 |
|
|
|
183 |
mask_up_chk, up_factor,
|
184 |
num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats,
|
185 |
beat_unmask_drop, mask_periodic_width, mask_periodic_dropout, mask_periodic_width_dropout,
|
186 |
+
n_conditioning_codebooks, use_coarse2fine, stretch_factor
|
187 |
):
|
188 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
189 |
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
218 |
"mask_periodic_width_dropout": mask_periodic_width_dropout,
|
219 |
"n_conditioning_codebooks": n_conditioning_codebooks,
|
220 |
"use_coarse2fine": use_coarse2fine,
|
221 |
+
"stretch_factor": stretch_factor,
|
222 |
}
|
223 |
|
224 |
# save with yaml
|
|
|
337 |
precision=0,
|
338 |
)
|
339 |
|
340 |
+
stretch_factor = gr.Slider(
|
341 |
+
label="time stretch factor",
|
342 |
+
minimum=0,
|
343 |
+
maximum=64,
|
344 |
+
step=1,
|
345 |
+
value=1,
|
346 |
+
)
|
347 |
+
|
348 |
mask_periodic_amt = gr.Slider(
|
349 |
label="periodic hint (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
|
350 |
minimum=0,
|
|
|
513 |
num_vamps, mode, use_beats, num_steps, snap_to_beats,
|
514 |
beat_unmask_drop, mask_periodic_width,
|
515 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
516 |
+
n_conditioning_codebooks, use_coarse2fine, stretch_factor
|
517 |
],
|
518 |
outputs=[output_audio, audio_mask],
|
519 |
api_name="vamp"
|
|
|
532 |
notes_text, use_beats, num_steps, snap_to_beats,
|
533 |
beat_unmask_drop, mask_periodic_width,
|
534 |
mask_periodic_dropout, mask_periodic_width_dropout,
|
535 |
+
n_conditioning_codebooks, use_coarse2fine, stretch_factor
|
536 |
],
|
537 |
outputs=[thank_you, download_file]
|
538 |
)
|
|
|
541 |
ez_vamp_button.click(
|
542 |
fn=ez_vamp,
|
543 |
inputs=[input_audio, init_temp, final_temp, mask_periodic_amt,
|
544 |
+
mask_periodic_width, num_steps, stretch_factor ],
|
545 |
outputs=[output_audio],
|
546 |
api_name="ez_vamp"
|
547 |
)
|
docker-compose.yml
DELETED
@@ -1,92 +0,0 @@
|
|
1 |
-
|
2 |
-
version: "3.5"
|
3 |
-
services:
|
4 |
-
tensorrt:
|
5 |
-
build:
|
6 |
-
context: .
|
7 |
-
dockerfile: ./deployment_build/dockerfile
|
8 |
-
args:
|
9 |
-
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
10 |
-
profiles:
|
11 |
-
- tensorrt
|
12 |
-
volumes:
|
13 |
-
- ./:/u/home/src
|
14 |
-
- ~/.config/gcloud:/root/.config/gcloud
|
15 |
-
deploy:
|
16 |
-
resources:
|
17 |
-
limits:
|
18 |
-
# match production limits
|
19 |
-
cpus: '7'
|
20 |
-
memory: 25000M
|
21 |
-
reservations:
|
22 |
-
devices:
|
23 |
-
- driver: nvidia
|
24 |
-
count: 1
|
25 |
-
capabilities: [gpu]
|
26 |
-
working_dir: /u/home/src
|
27 |
-
entrypoint:
|
28 |
-
- python
|
29 |
-
- -m
|
30 |
-
- wav2wav.converter
|
31 |
-
base:
|
32 |
-
build:
|
33 |
-
context: .
|
34 |
-
dockerfile: ./Dockerfile
|
35 |
-
args:
|
36 |
-
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
37 |
-
volumes:
|
38 |
-
- .:/u/home/src
|
39 |
-
- ~/.wav2wav:/u/home/.wav2wav
|
40 |
-
- ${PATH_TO_DATA}:/data
|
41 |
-
- ${PATH_TO_RUNS}:/runs
|
42 |
-
- ~/.config/gcloud:/u/home/.config/gcloud
|
43 |
-
- ~/.zsh_history:/u/home/.zsh_history
|
44 |
-
environment:
|
45 |
-
- GITHUB_TOKEN
|
46 |
-
- DISCOURSE_API_USERNAME
|
47 |
-
- DISCOURSE_SERVER
|
48 |
-
- DISCOURSE_API_KEY
|
49 |
-
- HOST_USER_ID
|
50 |
-
- HOST_USER_GID
|
51 |
-
- JUPYTER_TOKEN
|
52 |
-
- PATH_TO_DATA=/data
|
53 |
-
- PATH_TO_RUNS=/runs
|
54 |
-
- TENSORBOARD_PATH
|
55 |
-
- MPLCONFIGDIR=/u/home/.mplconfig
|
56 |
-
shm_size: 32G
|
57 |
-
working_dir: /u/home/src
|
58 |
-
deploy:
|
59 |
-
resources:
|
60 |
-
reservations:
|
61 |
-
devices:
|
62 |
-
- driver: nvidia
|
63 |
-
capabilities: [gpu]
|
64 |
-
dev:
|
65 |
-
extends: base
|
66 |
-
profiles:
|
67 |
-
- interactive
|
68 |
-
stdin_open: true
|
69 |
-
tty: true
|
70 |
-
ports:
|
71 |
-
- 7860:7860
|
72 |
-
jupyter:
|
73 |
-
extends: base
|
74 |
-
ports:
|
75 |
-
- ${JUPYTER_PORT}:8888
|
76 |
-
entrypoint:
|
77 |
-
- /bin/bash
|
78 |
-
- /entry_script_jupyter.sh
|
79 |
-
tensorboard:
|
80 |
-
extends: base
|
81 |
-
ports:
|
82 |
-
- ${TENSORBOARD_PORT}:6006
|
83 |
-
entrypoint:
|
84 |
-
- /bin/bash
|
85 |
-
- /entry_script_tensorboard.sh
|
86 |
-
gradio:
|
87 |
-
extends: base
|
88 |
-
ports:
|
89 |
-
- 7860:7860
|
90 |
-
entrypoint:
|
91 |
-
- /bin/bash
|
92 |
-
- /entry_script_gradio.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
argbind>=0.3.1
|
2 |
-
pytorch-ignite
|
3 |
-
rich
|
4 |
-
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
5 |
-
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@hf/vampnet-temp
|
6 |
-
torch==1.13.1
|
7 |
-
torchaudio==0.13.1
|
8 |
-
tqdm
|
9 |
-
tensorboard
|
10 |
-
google-cloud-logging==2.2.0
|
11 |
-
pytest
|
12 |
-
pytest-cov
|
13 |
-
pynvml
|
14 |
-
psutil
|
15 |
-
pandas
|
16 |
-
onnx
|
17 |
-
onnx-simplifier
|
18 |
-
seaborn
|
19 |
-
jupyterlab
|
20 |
-
jupyterlab-link-share
|
21 |
-
pandas
|
22 |
-
watchdog
|
23 |
-
pesq
|
24 |
-
tabulate
|
25 |
-
torchmetrics
|
26 |
-
codebraid==0.5.0
|
27 |
-
jupyter-client==6.1.12
|
28 |
-
tensorboardX
|
29 |
-
gradio
|
30 |
-
einops
|
31 |
-
frechet_audio_distance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.py
CHANGED
@@ -32,12 +32,13 @@ setup(
|
|
32 |
"rich",
|
33 |
"audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git",
|
34 |
"lac @ git+https://github.com/hugofloresgarcia/lac.git",
|
35 |
-
"wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat.git",
|
36 |
"torch==2.0",
|
37 |
"tqdm",
|
38 |
"tensorboard",
|
39 |
"google-cloud-logging==2.2.0",
|
40 |
"einops",
|
41 |
-
"frechet_audio_distance"
|
|
|
42 |
],
|
43 |
)
|
|
|
32 |
"rich",
|
33 |
"audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git",
|
34 |
"lac @ git+https://github.com/hugofloresgarcia/lac.git",
|
35 |
+
# "wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat.git",
|
36 |
"torch==2.0",
|
37 |
"tqdm",
|
38 |
"tensorboard",
|
39 |
"google-cloud-logging==2.2.0",
|
40 |
"einops",
|
41 |
+
# "frechet_audio_distance",
|
42 |
+
"gradio"
|
43 |
],
|
44 |
)
|
vampnet/interface.py
CHANGED
@@ -249,6 +249,7 @@ class Interface(torch.nn.Module):
|
|
249 |
suffix_dur_s: float = 0.0,
|
250 |
num_vamps: int = 1,
|
251 |
downsample_factor: int = None,
|
|
|
252 |
periodic_width: int = 1,
|
253 |
periodic_dropout=0.0,
|
254 |
periodic_width_dropout=0.0,
|
@@ -269,11 +270,33 @@ class Interface(torch.nn.Module):
|
|
269 |
n_prefix = self.s2t(prefix_dur_s)
|
270 |
n_suffix = self.s2t(suffix_dur_s)
|
271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
assert cz.shape[-1] <= self.s2t(self.coarse.chunk_size_s), f"the sequence of tokens provided must match the one specified in the coarse chunk size, but got {cz.shape[-1]} and {self.s2t(self.coarse.chunk_size_s)}"
|
273 |
assert n_prefix + n_suffix < c_seq_len, "prefix and suffix must be smaller than the chunk size"
|
274 |
|
275 |
if swap_prefix_suffix:
|
276 |
-
# swap the prefix and suffix
|
277 |
assert n_prefix == n_suffix, "prefix and suffix must be the same size for now"
|
278 |
cz[:, :, :n_prefix], cz[:, :, c_seq_len-n_suffix:] = cz[:, :, c_seq_len-n_suffix:], cz[:, :, :n_prefix].clone()
|
279 |
|
@@ -295,7 +318,7 @@ class Interface(torch.nn.Module):
|
|
295 |
downsample_factor=downsample_factor,
|
296 |
periodic_width=periodic_width,
|
297 |
periodic_dropout=periodic_dropout,
|
298 |
-
add_random_periodic_offset=
|
299 |
periodic_width_dropout=periodic_width_dropout,
|
300 |
mask=cz_mask,
|
301 |
ext_mask=ext_mask,
|
|
|
249 |
suffix_dur_s: float = 0.0,
|
250 |
num_vamps: int = 1,
|
251 |
downsample_factor: int = None,
|
252 |
+
stretch_factor: int = None,
|
253 |
periodic_width: int = 1,
|
254 |
periodic_dropout=0.0,
|
255 |
periodic_width_dropout=0.0,
|
|
|
270 |
n_prefix = self.s2t(prefix_dur_s)
|
271 |
n_suffix = self.s2t(suffix_dur_s)
|
272 |
|
273 |
+
|
274 |
+
# hmm, should be a better way to do this? think we just need a mask builder class
|
275 |
+
add_random_periodic_offset = True
|
276 |
+
|
277 |
+
if stretch_factor is not None and stretch_factor > 1:
|
278 |
+
print(f"stretching by {stretch_factor}")
|
279 |
+
assert stretch_factor >= 1, "stretch factor must be >= 1"
|
280 |
+
cz = cz.repeat_interleave(stretch_factor, dim=-1)
|
281 |
+
|
282 |
+
# the downsample factor is now relative to the stretched sequence
|
283 |
+
assert downsample_factor is None or downsample_factor <= 2, "downsample_factor must be None when stretch_factor is not None"
|
284 |
+
|
285 |
+
downsample_factor = stretch_factor
|
286 |
+
add_random_periodic_offset = False
|
287 |
+
|
288 |
+
assert n_prefix == 0 and n_suffix == 0, "prefix and suffix must be 0 when stretch_factor is not None"
|
289 |
+
assert ext_mask is None, "ext_mask must be None when stretch_factor is not None"
|
290 |
+
|
291 |
+
# trim cz to the original length
|
292 |
+
cz = cz[:, :, :c_seq_len]
|
293 |
+
|
294 |
+
|
295 |
assert cz.shape[-1] <= self.s2t(self.coarse.chunk_size_s), f"the sequence of tokens provided must match the one specified in the coarse chunk size, but got {cz.shape[-1]} and {self.s2t(self.coarse.chunk_size_s)}"
|
296 |
assert n_prefix + n_suffix < c_seq_len, "prefix and suffix must be smaller than the chunk size"
|
297 |
|
298 |
if swap_prefix_suffix:
|
299 |
+
# swap the prefix and suffix
|
300 |
assert n_prefix == n_suffix, "prefix and suffix must be the same size for now"
|
301 |
cz[:, :, :n_prefix], cz[:, :, c_seq_len-n_suffix:] = cz[:, :, c_seq_len-n_suffix:], cz[:, :, :n_prefix].clone()
|
302 |
|
|
|
318 |
downsample_factor=downsample_factor,
|
319 |
periodic_width=periodic_width,
|
320 |
periodic_dropout=periodic_dropout,
|
321 |
+
add_random_periodic_offset=add_random_periodic_offset,
|
322 |
periodic_width_dropout=periodic_width_dropout,
|
323 |
mask=cz_mask,
|
324 |
ext_mask=ext_mask,
|
vampnet/modules/base.py
CHANGED
@@ -71,7 +71,7 @@ class VampBase(at.ml.BaseModel):
|
|
71 |
probs[i, :, -n:] = 0.0
|
72 |
|
73 |
# if we have a downsample factor, set the mask prob to 0
|
74 |
-
if downsample_factor is not None:
|
75 |
if not isinstance(downsample_factor, torch.Tensor):
|
76 |
downsample_factor = scalar_to_batch_tensor(downsample_factor, x.shape[0])
|
77 |
for i, factor in enumerate(downsample_factor):
|
@@ -200,7 +200,6 @@ class VampBase(at.ml.BaseModel):
|
|
200 |
# find where the mask token is and replace it with silence in the audio
|
201 |
for tstep in range(z.shape[-1]):
|
202 |
if torch.any(z[:, :, tstep] == self.mask_token):
|
203 |
-
print("mask token found at step", tstep)
|
204 |
sample_idx_0 = tstep * codec.hop_length
|
205 |
sample_idx_1 = sample_idx_0 + codec.hop_length
|
206 |
signal.samples[:, :, sample_idx_0:sample_idx_1] = 0.0
|
|
|
71 |
probs[i, :, -n:] = 0.0
|
72 |
|
73 |
# if we have a downsample factor, set the mask prob to 0
|
74 |
+
if downsample_factor is not None and downsample_factor > 0:
|
75 |
if not isinstance(downsample_factor, torch.Tensor):
|
76 |
downsample_factor = scalar_to_batch_tensor(downsample_factor, x.shape[0])
|
77 |
for i, factor in enumerate(downsample_factor):
|
|
|
200 |
# find where the mask token is and replace it with silence in the audio
|
201 |
for tstep in range(z.shape[-1]):
|
202 |
if torch.any(z[:, :, tstep] == self.mask_token):
|
|
|
203 |
sample_idx_0 = tstep * codec.hop_length
|
204 |
sample_idx_1 = sample_idx_0 + codec.hop_length
|
205 |
signal.samples[:, :, sample_idx_0:sample_idx_1] = 0.0
|