Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
4a2dc41
1
Parent(s):
f1ccdc1
interface, cleanup imputation code
Browse files- env/data.sh +1 -1
- requirements.txt +1 -1
- scripts/exp/train.py +22 -14
- vampnet/interface.py +7 -1
env/data.sh
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
export PATH_TO_DATA=~/data
|
| 2 |
|
| 3 |
if [[ $(hostname) == "oon17" ]]; then
|
| 4 |
-
export PATH_TO_DATA=/
|
| 5 |
fi
|
| 6 |
|
| 7 |
if [[ $(hostname) == "oon19" ]]; then
|
|
|
|
| 1 |
export PATH_TO_DATA=~/data
|
| 2 |
|
| 3 |
if [[ $(hostname) == "oon17" ]]; then
|
| 4 |
+
export PATH_TO_DATA=/data/
|
| 5 |
fi
|
| 6 |
|
| 7 |
if [[ $(hostname) == "oon19" ]]; then
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
argbind>=0.3.1
|
| 2 |
pytorch-ignite
|
| 3 |
rich
|
| 4 |
-
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@
|
| 5 |
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
|
| 6 |
tqdm
|
| 7 |
tensorboard
|
|
|
|
| 1 |
argbind>=0.3.1
|
| 2 |
pytorch-ignite
|
| 3 |
rich
|
| 4 |
+
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
| 5 |
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
|
| 6 |
tqdm
|
| 7 |
tensorboard
|
scripts/exp/train.py
CHANGED
|
@@ -547,30 +547,38 @@ def train(
|
|
| 547 |
|
| 548 |
def save_imputation(self, z: torch.Tensor):
|
| 549 |
# imputations
|
| 550 |
-
|
| 551 |
-
|
| 552 |
|
| 553 |
-
|
| 554 |
-
|
|
|
|
|
|
|
| 555 |
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
)
|
| 561 |
-
|
| 562 |
-
|
|
|
|
| 563 |
|
| 564 |
imputed = []
|
| 565 |
for i in range(len(z)):
|
| 566 |
imputed.append(
|
| 567 |
-
|
| 568 |
codec=codec,
|
| 569 |
time_steps=z.shape[-1],
|
| 570 |
start_tokens=z[i][None, ...],
|
| 571 |
-
mask=
|
| 572 |
-
)
|
| 573 |
-
)
|
| 574 |
imputed = AudioSignal.batch(imputed)
|
| 575 |
|
| 576 |
for i in range(len(val_idx)):
|
|
|
|
| 547 |
|
| 548 |
def save_imputation(self, z: torch.Tensor):
|
| 549 |
# imputations
|
| 550 |
+
_prefix_amt = prefix_amt
|
| 551 |
+
_suffix_amt = suffix_amt
|
| 552 |
|
| 553 |
+
if _prefix_amt == 0:
|
| 554 |
+
_prefix_amt = 0.25
|
| 555 |
+
if _suffix_amt == 0:
|
| 556 |
+
_suffix_amt = 0.25
|
| 557 |
|
| 558 |
+
n_prefix = int(z.shape[-1] * _prefix_amt)
|
| 559 |
+
n_suffix = int(z.shape[-1] * _suffix_amt)
|
| 560 |
+
downsample_factor = None
|
| 561 |
+
|
| 562 |
+
vn = accel.unwrap(model)
|
| 563 |
+
|
| 564 |
+
z_mask, mask = vn.add_noise(
|
| 565 |
+
z, r=0.0, n_prefix=n_prefix, n_suffix=n_suffix,
|
| 566 |
+
downsample_factor=downsample_factor
|
| 567 |
)
|
| 568 |
+
|
| 569 |
+
imputed_noisy = vn.to_signal(z_mask, codec)
|
| 570 |
+
imputed_true = vn.to_signal(z, codec)
|
| 571 |
|
| 572 |
imputed = []
|
| 573 |
for i in range(len(z)):
|
| 574 |
imputed.append(
|
| 575 |
+
vn.sample(
|
| 576 |
codec=codec,
|
| 577 |
time_steps=z.shape[-1],
|
| 578 |
start_tokens=z[i][None, ...],
|
| 579 |
+
mask=mask[i][None, ...],
|
| 580 |
+
)
|
| 581 |
+
)
|
| 582 |
imputed = AudioSignal.batch(imputed)
|
| 583 |
|
| 584 |
for i in range(len(val_idx)):
|
vampnet/interface.py
CHANGED
|
@@ -53,7 +53,13 @@ class Interface:
|
|
| 53 |
|
| 54 |
@torch.inference_mode()
|
| 55 |
def encode(self, signal: AudioSignal):
|
| 56 |
-
signal =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
| 58 |
return z
|
| 59 |
|
|
|
|
| 53 |
|
| 54 |
@torch.inference_mode()
|
| 55 |
def encode(self, signal: AudioSignal):
|
| 56 |
+
signal = (
|
| 57 |
+
signal.clone().to(self.device)
|
| 58 |
+
.resample(self.codec.sample_rate)
|
| 59 |
+
.to_mono()
|
| 60 |
+
.normalize(-24)
|
| 61 |
+
.ensure_max_of_audio(1.0)
|
| 62 |
+
)
|
| 63 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
| 64 |
return z
|
| 65 |
|