Text-to-Speech
Moshi
English
French
tts
audio
adefossez commited on
Commit
8654c6d
·
verified ·
1 Parent(s): 50fddc5

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +161 -0
config.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "card": 2048,
3
+ "n_q": 32,
4
+ "dep_q": 32,
5
+ "delays": [
6
+ 0,
7
+ 0,
8
+ 2,
9
+ 2,
10
+ 2,
11
+ 2,
12
+ 2,
13
+ 2,
14
+ 2,
15
+ 2,
16
+ 2,
17
+ 2,
18
+ 2,
19
+ 2,
20
+ 2,
21
+ 2,
22
+ 2,
23
+ 2,
24
+ 2,
25
+ 2,
26
+ 2,
27
+ 2,
28
+ 2,
29
+ 2,
30
+ 2,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "dim": 2048,
41
+ "text_card": 8000,
42
+ "existing_text_padding_id": 3,
43
+ "num_heads": 16,
44
+ "num_layers": 16,
45
+ "hidden_scale": 4.125,
46
+ "causal": true,
47
+ "layer_scale": null,
48
+ "context": 500,
49
+ "max_period": 10000,
50
+ "gating": "silu",
51
+ "norm": "rms_norm_f32",
52
+ "positional_embedding": "rope",
53
+ "depformer_dim": 1024,
54
+ "depformer_num_heads": 16,
55
+ "depformer_num_layers": 4,
56
+ "depformer_dim_feedforward": 3072,
57
+ "depformer_multi_linear": true,
58
+ "depformer_pos_emb": "none",
59
+ "depformer_weights_per_step": true,
60
+ "depformer_low_rank_embeddings": 128,
61
+ "demux_second_stream": true,
62
+ "text_card_out": null,
63
+ "conditioners": {
64
+ "speaker_wavs": {
65
+ "type": "tensor",
66
+ "tensor": {
67
+ "dim": 512
68
+ }
69
+ },
70
+ "cfg": {
71
+ "type": "lut",
72
+ "lut": {
73
+ "n_bins": 7,
74
+ "dim": 16,
75
+ "tokenizer": "noop",
76
+ "possible_values": [
77
+ "1.0",
78
+ "1.5",
79
+ "2.0",
80
+ "2.5",
81
+ "3.0",
82
+ "3.5",
83
+ "4.0"
84
+ ]
85
+ }
86
+ },
87
+ "control": {
88
+ "type": "lut",
89
+ "lut": {
90
+ "dim": 2048,
91
+ "n_bins": 1,
92
+ "tokenizer": "noop",
93
+ "possible_values": [
94
+ "ok"
95
+ ]
96
+ }
97
+ }
98
+ },
99
+ "fuser": {
100
+ "cross_attention_pos_emb": true,
101
+ "cross_attention_pos_emb_scale": 1,
102
+ "sum": [
103
+ "control"
104
+ ],
105
+ "prepend": [],
106
+ "cross": [
107
+ "speaker_wavs"
108
+ ]
109
+ },
110
+ "cross_attention": true,
111
+ "tts_config": {
112
+ "audio_delay": 1.28,
113
+ "second_stream_ahead": 2
114
+ },
115
+ "model_id": {
116
+ "sig": "1e68beda",
117
+ "epoch": 240
118
+ },
119
+ "depformer_weights_per_step_schedule": [
120
+ 0,
121
+ 1,
122
+ 2,
123
+ 3,
124
+ 4,
125
+ 5,
126
+ 6,
127
+ 7,
128
+ 8,
129
+ 8,
130
+ 8,
131
+ 8,
132
+ 8,
133
+ 8,
134
+ 8,
135
+ 8,
136
+ 9,
137
+ 9,
138
+ 9,
139
+ 9,
140
+ 9,
141
+ 9,
142
+ 9,
143
+ 9,
144
+ 10,
145
+ 10,
146
+ 10,
147
+ 10,
148
+ 10,
149
+ 10,
150
+ 10,
151
+ 10
152
+ ],
153
+ "model_type": "tts",
154
+ "lm_gen_config": {
155
+ "temp": 0.6,
156
+ "text_temp": 0.6
157
+ },
158
+ "tokenizer_name": "tokenizer_spm_8k_en_fr_audio.model",
159
+ "mimi_name": "tokenizer-e351c8d8-checkpoint125.safetensors",
160
+ "moshi_name": "[email protected]"
161
+ }