anonymous20250515 commited on
Commit
a91890a
·
1 Parent(s): d0043de
Files changed (6) hide show
  1. compressor.ckpt +3 -0
  2. config_compressor.json +133 -0
  3. config_extractor.yaml +34 -0
  4. corrector.ckpt +3 -0
  5. extractor.pt +3 -0
  6. tsr.pt +3 -0
compressor.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b246733ebdf91bb45221f0200acde0d9dcf125968526bac74ca01e0cf2dabf22
3
+ size 613995185
config_compressor.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "stft_autoencoder",
3
+ "sample_size": 25344,
4
+ "sample_rate": 16000,
5
+ "audio_channels": 1,
6
+ "model": {
7
+ "encoder": {
8
+ "type": "oobleck",
9
+ "config": {
10
+ "in_channels": 1,
11
+ "latent_dim": 256,
12
+ "n_fft": 512,
13
+ "hop_length": 256,
14
+ "win_length": 512,
15
+ "hidden_channels": 256,
16
+ "n_head": 4,
17
+ "approx_qk_dim": 512,
18
+ "emb_dim": 128,
19
+ "emb_ks": 1,
20
+ "emb_hs": 1,
21
+ "num_layers": 3
22
+ }
23
+ },
24
+ "decoder": {
25
+ "type": "oobleck",
26
+ "config": {
27
+ "out_channels": 1,
28
+ "latent_dim": 128,
29
+ "n_fft": 512,
30
+ "hop_length": 256,
31
+ "win_length": 512,
32
+ "hidden_channels": 256,
33
+ "n_head": 4,
34
+ "approx_qk_dim": 512,
35
+ "emb_dim": 128,
36
+ "emb_ks": 1,
37
+ "emb_hs": 1,
38
+ "num_layers": 3
39
+ }
40
+ },
41
+ "bottleneck": {
42
+ "type": "vae"
43
+ },
44
+ "latent_dim": 128,
45
+ "downsampling_ratio": 256,
46
+ "io_channels": 1
47
+ },
48
+ "training": {
49
+ "learning_rate": 1.5e-4,
50
+ "warmup_steps": 0,
51
+ "use_ema": false,
52
+ "optimizer_configs": {
53
+ "autoencoder": {
54
+ "optimizer": {
55
+ "type": "AdamW",
56
+ "config": {
57
+ "betas": [0.8, 0.99],
58
+ "lr": 1.5e-4,
59
+ "weight_decay": 1e-3
60
+ }
61
+ },
62
+ "scheduler": {
63
+ "type": "InverseLR",
64
+ "config": {
65
+ "inv_gamma": 200000,
66
+ "power": 0.5,
67
+ "warmup": 0.999
68
+ }
69
+ }
70
+ },
71
+ "discriminator": {
72
+ "optimizer": {
73
+ "type": "AdamW",
74
+ "config": {
75
+ "betas": [0.8, 0.99],
76
+ "lr": 3e-4,
77
+ "weight_decay": 1e-3
78
+ }
79
+ },
80
+ "scheduler": {
81
+ "type": "InverseLR",
82
+ "config": {
83
+ "inv_gamma": 200000,
84
+ "power": 0.5,
85
+ "warmup": 0.999
86
+ }
87
+ }
88
+ }
89
+ },
90
+ "loss_configs": {
91
+ "discriminator": {
92
+ "type": "encodec",
93
+ "config": {
94
+ "filters": 64,
95
+ "n_ffts": [1280, 640, 320, 160, 80],
96
+ "hop_lengths": [320, 160, 80, 40, 20],
97
+ "win_lengths": [1280, 640, 320, 160, 80]
98
+ },
99
+ "weights": {
100
+ "adversarial": 0.1,
101
+ "feature_matching": 5.0
102
+ }
103
+ },
104
+ "spectral": {
105
+ "type": "mrstft",
106
+ "config": {
107
+ "fft_sizes": [1280, 640, 320, 160, 80, 40, 20],
108
+ "hop_sizes": [320, 160, 80, 40, 20, 10, 5],
109
+ "win_lengths": [1280, 640, 320, 160, 80, 40, 20],
110
+ "perceptual_weighting": true
111
+ },
112
+ "weights": {
113
+ "mrstft": 1.0
114
+ }
115
+ },
116
+ "time": {
117
+ "type": "l1",
118
+ "weights": {
119
+ "l1": 0.0
120
+ }
121
+ },
122
+ "bottleneck": {
123
+ "type": "kl",
124
+ "weights": {
125
+ "kl": 1e-4
126
+ }
127
+ }
128
+ },
129
+ "demo": {
130
+ "demo_every": 10000
131
+ }
132
+ }
133
+ }
config_extractor.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1.0
2
+
3
+ system: "large"
4
+
5
+ ddim:
6
+ v_prediction: true
7
+ diffusers:
8
+ num_train_timesteps: 1000
9
+ beta_schedule: 'scaled_linear'
10
+ beta_start: 0.00085
11
+ beta_end: 0.012
12
+ prediction_type: 'v_prediction'
13
+ rescale_betas_zero_snr: true
14
+ timestep_spacing: 'trailing'
15
+ clip_sample: false
16
+
17
+ diffwrap:
18
+ ViT:
19
+ in_chans: 384
20
+ embed_dim: 512
21
+ depth: 16
22
+ num_heads: 8
23
+ mlp_ratio: 4.0
24
+ use_checkpoint: false
25
+ UDiT:
26
+ in_chans: 256
27
+ out_chans: 128
28
+ embed_dim: 1024
29
+ depth: 16
30
+ num_heads: 16
31
+ mlp_ratio: 4.0
32
+ use_checkpoint: false
33
+ context_dim: 384
34
+ context_fusion: 'cross'
corrector.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113f40dead02b1cf08a65f361aff40ea38ffdda9bff531536a7c495b9da169ab
3
+ size 1313618142
extractor.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140170bc47345c32c14085e95ddf36bf3b47a0f872a1c69357950a20d5baeb43
3
+ size 5690032357
tsr.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd71bb9650707190053528bb356f74f5736580b7839150e57c7f4b08164c4a4c
3
+ size 1771671708