Mohamed2210 commited on
Commit
f56d59e
·
verified ·
1 Parent(s): 1df81c3

Initial commit of wav2vec2 model

Browse files
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: jonatasgrosman/wav2vec2-large-xlsr-53-arabic
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - common_voice_11_0
9
+ model-index:
10
+ - name: wav2vec2-large-xlsr-ar
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # wav2vec2-large-xlsr-ar
18
+
19
+ This model is a fine-tuned version of [jonatasgrosman/wav2vec2-large-xlsr-53-arabic](https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-arabic) on the common_voice_11_0 dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - eval_loss: 0.8298
22
+ - eval_wer: 0.6859
23
+ - eval_runtime: 192.8428
24
+ - eval_samples_per_second: 15.515
25
+ - eval_steps_per_second: 1.939
26
+ - epoch: 5.1217
27
+ - step: 4000
28
+
29
+ ## Model description
30
+
31
+ More information needed
32
+
33
+ ## Intended uses & limitations
34
+
35
+ More information needed
36
+
37
+ ## Training and evaluation data
38
+
39
+ More information needed
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 0.0003
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 8
49
+ - seed: 42
50
+ - gradient_accumulation_steps: 2
51
+ - total_train_batch_size: 32
52
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
53
+ - lr_scheduler_type: linear
54
+ - lr_scheduler_warmup_steps: 500
55
+ - num_epochs: 10
56
+ - mixed_precision_training: Native AMP
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 4.49.0
61
+ - Pytorch 2.5.1+cu121
62
+ - Datasets 3.3.1
63
+ - Tokenizers 0.21.0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 57,
3
+ "<s>": 56
4
+ }
checkpoint-3600/config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
3
+ "activation_dropout": 0.05,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 256,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 55,
89
+ "proj_codevector_dim": 256,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.49.0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 58,
115
+ "xvector_output_dim": 512
116
+ }
checkpoint-3600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d774b699a30727eb33a5e7217da3e149f61b2d95b6bd7b5e736dc668157ca205
3
+ size 1262045280
checkpoint-3600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed6a832e72a245371d5398a7bd48dca9483460443752acdfa374b33280553c5
3
+ size 2490635318
checkpoint-3600/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
checkpoint-3600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771b93fdea688e3ab7e29159ff3cadf87094f642ead96ea353a019167376a1c7
3
+ size 14244
checkpoint-3600/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa16ed8883894544f2c7e1b4e8394eab28d70a7345c1739e0f82c96289b9ceee
3
+ size 988
checkpoint-3600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0a13d8337e5762d7c6ad79cc94c41699eda28a00da0ddc051c003391dec30b3
3
+ size 1064
checkpoint-3600/trainer_state.json ADDED
@@ -0,0 +1,1122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.678503046127067,
3
+ "best_model_checkpoint": "./wav2vec2-large-xlsr-ar/checkpoint-3600",
4
+ "epoch": 4.609865470852018,
5
+ "eval_steps": 400,
6
+ "global_step": 3600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.032030749519538756,
13
+ "grad_norm": 40.31281661987305,
14
+ "learning_rate": 1.3799999999999998e-05,
15
+ "loss": 25.5997,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.06406149903907751,
20
+ "grad_norm": 70.23408508300781,
21
+ "learning_rate": 2.7599999999999997e-05,
22
+ "loss": 30.9717,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.09609224855861627,
27
+ "grad_norm": 3.012640953063965,
28
+ "learning_rate": 4.259999999999999e-05,
29
+ "loss": 9.1896,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.12812299807815503,
34
+ "grad_norm": 30.363168716430664,
35
+ "learning_rate": 5.76e-05,
36
+ "loss": 12.5434,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.1601537475976938,
41
+ "grad_norm": 13.795882225036621,
42
+ "learning_rate": 7.259999999999999e-05,
43
+ "loss": 5.814,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.19218449711723254,
48
+ "grad_norm": 99.32404327392578,
49
+ "learning_rate": 8.759999999999999e-05,
50
+ "loss": 9.7835,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.2242152466367713,
55
+ "grad_norm": 4.108926773071289,
56
+ "learning_rate": 0.0001026,
57
+ "loss": 5.1191,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.25624599615631005,
62
+ "grad_norm": 7.492372512817383,
63
+ "learning_rate": 0.0001176,
64
+ "loss": 3.7315,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.2882767456758488,
69
+ "grad_norm": 7.183516502380371,
70
+ "learning_rate": 0.0001326,
71
+ "loss": 3.6219,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.3203074951953876,
76
+ "grad_norm": 2.490111827850342,
77
+ "learning_rate": 0.00014759999999999998,
78
+ "loss": 3.6824,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.3523382447149263,
83
+ "grad_norm": 0.5032612681388855,
84
+ "learning_rate": 0.0001626,
85
+ "loss": 3.5972,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.3843689942344651,
90
+ "grad_norm": 3.7791531085968018,
91
+ "learning_rate": 0.00017759999999999998,
92
+ "loss": 3.6051,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.41639974375400385,
97
+ "grad_norm": 2.6355323791503906,
98
+ "learning_rate": 0.0001926,
99
+ "loss": 3.5084,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.4484304932735426,
104
+ "grad_norm": 1.8424248695373535,
105
+ "learning_rate": 0.00020759999999999998,
106
+ "loss": 3.6254,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.48046124279308133,
111
+ "grad_norm": 2.8042099475860596,
112
+ "learning_rate": 0.0002226,
113
+ "loss": 3.5539,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.5124919923126201,
118
+ "grad_norm": 1.1292295455932617,
119
+ "learning_rate": 0.0002376,
120
+ "loss": 3.5401,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.5124919923126201,
125
+ "eval_loss": 3.4790358543395996,
126
+ "eval_runtime": 190.1673,
127
+ "eval_samples_per_second": 15.734,
128
+ "eval_steps_per_second": 1.967,
129
+ "eval_wer": 1.0,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.5445227418321589,
134
+ "grad_norm": 5.499414443969727,
135
+ "learning_rate": 0.00025259999999999996,
136
+ "loss": 3.4746,
137
+ "step": 425
138
+ },
139
+ {
140
+ "epoch": 0.5765534913516976,
141
+ "grad_norm": 3.2174575328826904,
142
+ "learning_rate": 0.0002676,
143
+ "loss": 3.5553,
144
+ "step": 450
145
+ },
146
+ {
147
+ "epoch": 0.6085842408712364,
148
+ "grad_norm": 0.8652946352958679,
149
+ "learning_rate": 0.0002826,
150
+ "loss": 3.4252,
151
+ "step": 475
152
+ },
153
+ {
154
+ "epoch": 0.6406149903907752,
155
+ "grad_norm": 2.645606517791748,
156
+ "learning_rate": 0.00029759999999999997,
157
+ "loss": 3.5508,
158
+ "step": 500
159
+ },
160
+ {
161
+ "epoch": 0.672645739910314,
162
+ "grad_norm": 0.9275538921356201,
163
+ "learning_rate": 0.00029913698630136987,
164
+ "loss": 3.4857,
165
+ "step": 525
166
+ },
167
+ {
168
+ "epoch": 0.7046764894298526,
169
+ "grad_norm": 1.0166261196136475,
170
+ "learning_rate": 0.00029810958904109586,
171
+ "loss": 3.5138,
172
+ "step": 550
173
+ },
174
+ {
175
+ "epoch": 0.7367072389493914,
176
+ "grad_norm": 4.685708045959473,
177
+ "learning_rate": 0.0002970821917808219,
178
+ "loss": 3.4017,
179
+ "step": 575
180
+ },
181
+ {
182
+ "epoch": 0.7687379884689302,
183
+ "grad_norm": 1.1367671489715576,
184
+ "learning_rate": 0.0002960547945205479,
185
+ "loss": 3.4927,
186
+ "step": 600
187
+ },
188
+ {
189
+ "epoch": 0.8007687379884689,
190
+ "grad_norm": 2.971071481704712,
191
+ "learning_rate": 0.00029502739726027395,
192
+ "loss": 3.4265,
193
+ "step": 625
194
+ },
195
+ {
196
+ "epoch": 0.8327994875080077,
197
+ "grad_norm": 2.657762289047241,
198
+ "learning_rate": 0.000294,
199
+ "loss": 3.4645,
200
+ "step": 650
201
+ },
202
+ {
203
+ "epoch": 0.8648302370275465,
204
+ "grad_norm": 2.87245774269104,
205
+ "learning_rate": 0.000292972602739726,
206
+ "loss": 3.3777,
207
+ "step": 675
208
+ },
209
+ {
210
+ "epoch": 0.8968609865470852,
211
+ "grad_norm": 1.2535868883132935,
212
+ "learning_rate": 0.00029194520547945203,
213
+ "loss": 3.4021,
214
+ "step": 700
215
+ },
216
+ {
217
+ "epoch": 0.928891736066624,
218
+ "grad_norm": 0.7670681476593018,
219
+ "learning_rate": 0.00029091780821917807,
220
+ "loss": 3.3667,
221
+ "step": 725
222
+ },
223
+ {
224
+ "epoch": 0.9609224855861627,
225
+ "grad_norm": 1.4213225841522217,
226
+ "learning_rate": 0.0002898904109589041,
227
+ "loss": 3.3387,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 0.9929532351057014,
232
+ "grad_norm": 1.161726951599121,
233
+ "learning_rate": 0.0002888630136986301,
234
+ "loss": 2.7481,
235
+ "step": 775
236
+ },
237
+ {
238
+ "epoch": 1.0243433696348494,
239
+ "grad_norm": 2.047264337539673,
240
+ "learning_rate": 0.00028783561643835616,
241
+ "loss": 2.002,
242
+ "step": 800
243
+ },
244
+ {
245
+ "epoch": 1.0243433696348494,
246
+ "eval_loss": 1.6117621660232544,
247
+ "eval_runtime": 190.6575,
248
+ "eval_samples_per_second": 15.693,
249
+ "eval_steps_per_second": 1.962,
250
+ "eval_wer": 0.9870322019147084,
251
+ "step": 800
252
+ },
253
+ {
254
+ "epoch": 1.0563741191543883,
255
+ "grad_norm": 3.6471316814422607,
256
+ "learning_rate": 0.00028680821917808215,
257
+ "loss": 1.7004,
258
+ "step": 825
259
+ },
260
+ {
261
+ "epoch": 1.088404868673927,
262
+ "grad_norm": 1.8413678407669067,
263
+ "learning_rate": 0.0002857808219178082,
264
+ "loss": 1.4543,
265
+ "step": 850
266
+ },
267
+ {
268
+ "epoch": 1.1204356181934658,
269
+ "grad_norm": 2.3082125186920166,
270
+ "learning_rate": 0.00028475342465753424,
271
+ "loss": 1.3853,
272
+ "step": 875
273
+ },
274
+ {
275
+ "epoch": 1.1524663677130045,
276
+ "grad_norm": 1.7227452993392944,
277
+ "learning_rate": 0.00028372602739726023,
278
+ "loss": 1.336,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 1.1844971172325431,
283
+ "grad_norm": 2.1250977516174316,
284
+ "learning_rate": 0.0002826986301369863,
285
+ "loss": 1.349,
286
+ "step": 925
287
+ },
288
+ {
289
+ "epoch": 1.216527866752082,
290
+ "grad_norm": 1.6314936876296997,
291
+ "learning_rate": 0.0002816712328767123,
292
+ "loss": 1.2656,
293
+ "step": 950
294
+ },
295
+ {
296
+ "epoch": 1.2485586162716207,
297
+ "grad_norm": 2.001681089401245,
298
+ "learning_rate": 0.00028064383561643837,
299
+ "loss": 1.2556,
300
+ "step": 975
301
+ },
302
+ {
303
+ "epoch": 1.2805893657911596,
304
+ "grad_norm": 2.1396918296813965,
305
+ "learning_rate": 0.00027961643835616436,
306
+ "loss": 1.1932,
307
+ "step": 1000
308
+ },
309
+ {
310
+ "epoch": 1.3126201153106982,
311
+ "grad_norm": 1.9855870008468628,
312
+ "learning_rate": 0.0002785890410958904,
313
+ "loss": 1.1991,
314
+ "step": 1025
315
+ },
316
+ {
317
+ "epoch": 1.344650864830237,
318
+ "grad_norm": 1.1789072751998901,
319
+ "learning_rate": 0.0002775616438356164,
320
+ "loss": 1.1041,
321
+ "step": 1050
322
+ },
323
+ {
324
+ "epoch": 1.3766816143497758,
325
+ "grad_norm": 2.304903507232666,
326
+ "learning_rate": 0.00027653424657534244,
327
+ "loss": 1.1538,
328
+ "step": 1075
329
+ },
330
+ {
331
+ "epoch": 1.4087123638693146,
332
+ "grad_norm": 2.459096670150757,
333
+ "learning_rate": 0.00027550684931506843,
334
+ "loss": 1.2379,
335
+ "step": 1100
336
+ },
337
+ {
338
+ "epoch": 1.4407431133888533,
339
+ "grad_norm": 1.886155605316162,
340
+ "learning_rate": 0.00027447945205479453,
341
+ "loss": 1.1326,
342
+ "step": 1125
343
+ },
344
+ {
345
+ "epoch": 1.472773862908392,
346
+ "grad_norm": 1.1992785930633545,
347
+ "learning_rate": 0.0002734520547945205,
348
+ "loss": 1.0691,
349
+ "step": 1150
350
+ },
351
+ {
352
+ "epoch": 1.5048046124279308,
353
+ "grad_norm": 3.249142646789551,
354
+ "learning_rate": 0.00027242465753424657,
355
+ "loss": 1.0511,
356
+ "step": 1175
357
+ },
358
+ {
359
+ "epoch": 1.5368353619474697,
360
+ "grad_norm": 3.3806302547454834,
361
+ "learning_rate": 0.00027139726027397256,
362
+ "loss": 1.0618,
363
+ "step": 1200
364
+ },
365
+ {
366
+ "epoch": 1.5368353619474697,
367
+ "eval_loss": 1.0360716581344604,
368
+ "eval_runtime": 191.2082,
369
+ "eval_samples_per_second": 15.648,
370
+ "eval_steps_per_second": 1.956,
371
+ "eval_wer": 0.8269799825935596,
372
+ "step": 1200
373
+ },
374
+ {
375
+ "epoch": 1.5688661114670084,
376
+ "grad_norm": 2.508125066757202,
377
+ "learning_rate": 0.0002703698630136986,
378
+ "loss": 1.0652,
379
+ "step": 1225
380
+ },
381
+ {
382
+ "epoch": 1.600896860986547,
383
+ "grad_norm": 1.3717399835586548,
384
+ "learning_rate": 0.00026934246575342465,
385
+ "loss": 1.0354,
386
+ "step": 1250
387
+ },
388
+ {
389
+ "epoch": 1.6329276105060857,
390
+ "grad_norm": 1.5011590719223022,
391
+ "learning_rate": 0.00026831506849315064,
392
+ "loss": 1.0427,
393
+ "step": 1275
394
+ },
395
+ {
396
+ "epoch": 1.6649583600256246,
397
+ "grad_norm": 1.6448092460632324,
398
+ "learning_rate": 0.0002672876712328767,
399
+ "loss": 1.0498,
400
+ "step": 1300
401
+ },
402
+ {
403
+ "epoch": 1.6969891095451635,
404
+ "grad_norm": 1.4456716775894165,
405
+ "learning_rate": 0.0002662602739726027,
406
+ "loss": 1.0481,
407
+ "step": 1325
408
+ },
409
+ {
410
+ "epoch": 1.7290198590647021,
411
+ "grad_norm": 1.2869809865951538,
412
+ "learning_rate": 0.0002652328767123288,
413
+ "loss": 1.0271,
414
+ "step": 1350
415
+ },
416
+ {
417
+ "epoch": 1.7610506085842408,
418
+ "grad_norm": 4.315392971038818,
419
+ "learning_rate": 0.00026420547945205477,
420
+ "loss": 0.978,
421
+ "step": 1375
422
+ },
423
+ {
424
+ "epoch": 1.7930813581037797,
425
+ "grad_norm": 1.3269984722137451,
426
+ "learning_rate": 0.0002631780821917808,
427
+ "loss": 0.9891,
428
+ "step": 1400
429
+ },
430
+ {
431
+ "epoch": 1.8251121076233185,
432
+ "grad_norm": 1.6529700756072998,
433
+ "learning_rate": 0.0002621506849315068,
434
+ "loss": 0.9917,
435
+ "step": 1425
436
+ },
437
+ {
438
+ "epoch": 1.8571428571428572,
439
+ "grad_norm": 2.1668319702148438,
440
+ "learning_rate": 0.00026112328767123285,
441
+ "loss": 0.9745,
442
+ "step": 1450
443
+ },
444
+ {
445
+ "epoch": 1.8891736066623959,
446
+ "grad_norm": 6.553292751312256,
447
+ "learning_rate": 0.0002600958904109589,
448
+ "loss": 0.9898,
449
+ "step": 1475
450
+ },
451
+ {
452
+ "epoch": 1.9212043561819345,
453
+ "grad_norm": 1.2242108583450317,
454
+ "learning_rate": 0.0002590684931506849,
455
+ "loss": 0.9363,
456
+ "step": 1500
457
+ },
458
+ {
459
+ "epoch": 1.9532351057014734,
460
+ "grad_norm": 2.026926040649414,
461
+ "learning_rate": 0.00025804109589041093,
462
+ "loss": 0.9731,
463
+ "step": 1525
464
+ },
465
+ {
466
+ "epoch": 1.9852658552210123,
467
+ "grad_norm": 1.560719609260559,
468
+ "learning_rate": 0.000257013698630137,
469
+ "loss": 0.8685,
470
+ "step": 1550
471
+ },
472
+ {
473
+ "epoch": 2.01665598975016,
474
+ "grad_norm": 1.0007785558700562,
475
+ "learning_rate": 0.000255986301369863,
476
+ "loss": 0.8586,
477
+ "step": 1575
478
+ },
479
+ {
480
+ "epoch": 2.048686739269699,
481
+ "grad_norm": 1.0924744606018066,
482
+ "learning_rate": 0.000254958904109589,
483
+ "loss": 0.8025,
484
+ "step": 1600
485
+ },
486
+ {
487
+ "epoch": 2.048686739269699,
488
+ "eval_loss": 0.8233081102371216,
489
+ "eval_runtime": 191.163,
490
+ "eval_samples_per_second": 15.652,
491
+ "eval_steps_per_second": 1.956,
492
+ "eval_wer": 0.751348999129678,
493
+ "step": 1600
494
+ },
495
+ {
496
+ "epoch": 2.0807174887892375,
497
+ "grad_norm": 1.9123071432113647,
498
+ "learning_rate": 0.00025393150684931506,
499
+ "loss": 0.8919,
500
+ "step": 1625
501
+ },
502
+ {
503
+ "epoch": 2.1127482383087766,
504
+ "grad_norm": 1.7388331890106201,
505
+ "learning_rate": 0.00025290410958904105,
506
+ "loss": 0.8667,
507
+ "step": 1650
508
+ },
509
+ {
510
+ "epoch": 2.144778987828315,
511
+ "grad_norm": 1.247045874595642,
512
+ "learning_rate": 0.0002518767123287671,
513
+ "loss": 0.8716,
514
+ "step": 1675
515
+ },
516
+ {
517
+ "epoch": 2.176809737347854,
518
+ "grad_norm": 2.1057279109954834,
519
+ "learning_rate": 0.00025084931506849314,
520
+ "loss": 0.7654,
521
+ "step": 1700
522
+ },
523
+ {
524
+ "epoch": 2.2088404868673925,
525
+ "grad_norm": 1.0074440240859985,
526
+ "learning_rate": 0.00024982191780821913,
527
+ "loss": 0.8732,
528
+ "step": 1725
529
+ },
530
+ {
531
+ "epoch": 2.2408712363869316,
532
+ "grad_norm": 2.159853219985962,
533
+ "learning_rate": 0.0002487945205479452,
534
+ "loss": 0.7941,
535
+ "step": 1750
536
+ },
537
+ {
538
+ "epoch": 2.2729019859064703,
539
+ "grad_norm": 1.8050284385681152,
540
+ "learning_rate": 0.0002477671232876712,
541
+ "loss": 0.8496,
542
+ "step": 1775
543
+ },
544
+ {
545
+ "epoch": 2.304932735426009,
546
+ "grad_norm": 1.3987536430358887,
547
+ "learning_rate": 0.00024673972602739727,
548
+ "loss": 0.8332,
549
+ "step": 1800
550
+ },
551
+ {
552
+ "epoch": 2.3369634849455476,
553
+ "grad_norm": 2.5281410217285156,
554
+ "learning_rate": 0.00024571232876712326,
555
+ "loss": 0.8135,
556
+ "step": 1825
557
+ },
558
+ {
559
+ "epoch": 2.3689942344650863,
560
+ "grad_norm": 1.914908766746521,
561
+ "learning_rate": 0.0002446849315068493,
562
+ "loss": 0.8067,
563
+ "step": 1850
564
+ },
565
+ {
566
+ "epoch": 2.4010249839846254,
567
+ "grad_norm": 1.1049237251281738,
568
+ "learning_rate": 0.00024365753424657533,
569
+ "loss": 0.8027,
570
+ "step": 1875
571
+ },
572
+ {
573
+ "epoch": 2.433055733504164,
574
+ "grad_norm": 2.2418999671936035,
575
+ "learning_rate": 0.00024263013698630134,
576
+ "loss": 0.7857,
577
+ "step": 1900
578
+ },
579
+ {
580
+ "epoch": 2.4650864830237027,
581
+ "grad_norm": 1.4093470573425293,
582
+ "learning_rate": 0.00024160273972602736,
583
+ "loss": 0.8265,
584
+ "step": 1925
585
+ },
586
+ {
587
+ "epoch": 2.4971172325432414,
588
+ "grad_norm": 1.396606206893921,
589
+ "learning_rate": 0.0002405753424657534,
590
+ "loss": 0.7601,
591
+ "step": 1950
592
+ },
593
+ {
594
+ "epoch": 2.5291479820627805,
595
+ "grad_norm": 1.2854044437408447,
596
+ "learning_rate": 0.00023954794520547945,
597
+ "loss": 0.8408,
598
+ "step": 1975
599
+ },
600
+ {
601
+ "epoch": 2.561178731582319,
602
+ "grad_norm": 2.2053070068359375,
603
+ "learning_rate": 0.00023852054794520547,
604
+ "loss": 0.7199,
605
+ "step": 2000
606
+ },
607
+ {
608
+ "epoch": 2.561178731582319,
609
+ "eval_loss": 0.7817878127098083,
610
+ "eval_runtime": 192.0652,
611
+ "eval_samples_per_second": 15.578,
612
+ "eval_steps_per_second": 1.947,
613
+ "eval_wer": 0.7203655352480418,
614
+ "step": 2000
615
+ },
616
+ {
617
+ "epoch": 2.593209481101858,
618
+ "grad_norm": 1.6103401184082031,
619
+ "learning_rate": 0.0002374931506849315,
620
+ "loss": 0.7848,
621
+ "step": 2025
622
+ },
623
+ {
624
+ "epoch": 2.6252402306213964,
625
+ "grad_norm": 3.00805401802063,
626
+ "learning_rate": 0.0002364657534246575,
627
+ "loss": 0.7485,
628
+ "step": 2050
629
+ },
630
+ {
631
+ "epoch": 2.657270980140935,
632
+ "grad_norm": 1.0826023817062378,
633
+ "learning_rate": 0.00023543835616438353,
634
+ "loss": 0.8131,
635
+ "step": 2075
636
+ },
637
+ {
638
+ "epoch": 2.689301729660474,
639
+ "grad_norm": 2.3294951915740967,
640
+ "learning_rate": 0.00023441095890410955,
641
+ "loss": 0.7616,
642
+ "step": 2100
643
+ },
644
+ {
645
+ "epoch": 2.721332479180013,
646
+ "grad_norm": 1.232429027557373,
647
+ "learning_rate": 0.0002333835616438356,
648
+ "loss": 0.7925,
649
+ "step": 2125
650
+ },
651
+ {
652
+ "epoch": 2.7533632286995515,
653
+ "grad_norm": 1.8985693454742432,
654
+ "learning_rate": 0.00023235616438356164,
655
+ "loss": 0.7829,
656
+ "step": 2150
657
+ },
658
+ {
659
+ "epoch": 2.78539397821909,
660
+ "grad_norm": 1.1546630859375,
661
+ "learning_rate": 0.00023132876712328765,
662
+ "loss": 0.8053,
663
+ "step": 2175
664
+ },
665
+ {
666
+ "epoch": 2.8174247277386293,
667
+ "grad_norm": 1.2817527055740356,
668
+ "learning_rate": 0.00023030136986301367,
669
+ "loss": 0.7452,
670
+ "step": 2200
671
+ },
672
+ {
673
+ "epoch": 2.849455477258168,
674
+ "grad_norm": 1.7231945991516113,
675
+ "learning_rate": 0.00022927397260273972,
676
+ "loss": 0.7817,
677
+ "step": 2225
678
+ },
679
+ {
680
+ "epoch": 2.8814862267777066,
681
+ "grad_norm": 2.686530113220215,
682
+ "learning_rate": 0.00022824657534246574,
683
+ "loss": 0.7506,
684
+ "step": 2250
685
+ },
686
+ {
687
+ "epoch": 2.9135169762972453,
688
+ "grad_norm": 1.3286162614822388,
689
+ "learning_rate": 0.00022721917808219176,
690
+ "loss": 0.7947,
691
+ "step": 2275
692
+ },
693
+ {
694
+ "epoch": 2.945547725816784,
695
+ "grad_norm": 1.3969508409500122,
696
+ "learning_rate": 0.00022619178082191777,
697
+ "loss": 0.7174,
698
+ "step": 2300
699
+ },
700
+ {
701
+ "epoch": 2.977578475336323,
702
+ "grad_norm": 1.835070013999939,
703
+ "learning_rate": 0.0002251643835616438,
704
+ "loss": 0.7531,
705
+ "step": 2325
706
+ },
707
+ {
708
+ "epoch": 3.008968609865471,
709
+ "grad_norm": 2.8138489723205566,
710
+ "learning_rate": 0.00022413698630136986,
711
+ "loss": 0.7638,
712
+ "step": 2350
713
+ },
714
+ {
715
+ "epoch": 3.0409993593850095,
716
+ "grad_norm": 1.0872498750686646,
717
+ "learning_rate": 0.00022310958904109588,
718
+ "loss": 0.6102,
719
+ "step": 2375
720
+ },
721
+ {
722
+ "epoch": 3.073030108904548,
723
+ "grad_norm": 2.0995171070098877,
724
+ "learning_rate": 0.0002220821917808219,
725
+ "loss": 0.675,
726
+ "step": 2400
727
+ },
728
+ {
729
+ "epoch": 3.073030108904548,
730
+ "eval_loss": 0.8002874255180359,
731
+ "eval_runtime": 191.1551,
732
+ "eval_samples_per_second": 15.652,
733
+ "eval_steps_per_second": 1.957,
734
+ "eval_wer": 0.7121845082680592,
735
+ "step": 2400
736
+ },
737
+ {
738
+ "epoch": 3.1050608584240873,
739
+ "grad_norm": 1.8755576610565186,
740
+ "learning_rate": 0.00022105479452054792,
741
+ "loss": 0.5922,
742
+ "step": 2425
743
+ },
744
+ {
745
+ "epoch": 3.137091607943626,
746
+ "grad_norm": 0.8880970478057861,
747
+ "learning_rate": 0.00022002739726027397,
748
+ "loss": 0.6929,
749
+ "step": 2450
750
+ },
751
+ {
752
+ "epoch": 3.1691223574631646,
753
+ "grad_norm": 2.357203722000122,
754
+ "learning_rate": 0.00021899999999999998,
755
+ "loss": 0.5997,
756
+ "step": 2475
757
+ },
758
+ {
759
+ "epoch": 3.2011531069827033,
760
+ "grad_norm": 0.9104003310203552,
761
+ "learning_rate": 0.000217972602739726,
762
+ "loss": 0.728,
763
+ "step": 2500
764
+ },
765
+ {
766
+ "epoch": 3.233183856502242,
767
+ "grad_norm": 1.5452452898025513,
768
+ "learning_rate": 0.00021694520547945202,
769
+ "loss": 0.5799,
770
+ "step": 2525
771
+ },
772
+ {
773
+ "epoch": 3.265214606021781,
774
+ "grad_norm": 0.8508313894271851,
775
+ "learning_rate": 0.00021591780821917807,
776
+ "loss": 0.6859,
777
+ "step": 2550
778
+ },
779
+ {
780
+ "epoch": 3.2972453555413197,
781
+ "grad_norm": 1.8509936332702637,
782
+ "learning_rate": 0.0002148904109589041,
783
+ "loss": 0.6178,
784
+ "step": 2575
785
+ },
786
+ {
787
+ "epoch": 3.3292761050608584,
788
+ "grad_norm": 1.355774998664856,
789
+ "learning_rate": 0.00021386301369863013,
790
+ "loss": 0.6908,
791
+ "step": 2600
792
+ },
793
+ {
794
+ "epoch": 3.361306854580397,
795
+ "grad_norm": 1.322189211845398,
796
+ "learning_rate": 0.00021283561643835615,
797
+ "loss": 0.6134,
798
+ "step": 2625
799
+ },
800
+ {
801
+ "epoch": 3.393337604099936,
802
+ "grad_norm": 1.606176495552063,
803
+ "learning_rate": 0.00021180821917808217,
804
+ "loss": 0.6974,
805
+ "step": 2650
806
+ },
807
+ {
808
+ "epoch": 3.425368353619475,
809
+ "grad_norm": 1.7283929586410522,
810
+ "learning_rate": 0.00021078082191780818,
811
+ "loss": 0.6069,
812
+ "step": 2675
813
+ },
814
+ {
815
+ "epoch": 3.4573991031390134,
816
+ "grad_norm": 1.2916626930236816,
817
+ "learning_rate": 0.00020975342465753423,
818
+ "loss": 0.7165,
819
+ "step": 2700
820
+ },
821
+ {
822
+ "epoch": 3.489429852658552,
823
+ "grad_norm": 1.8512370586395264,
824
+ "learning_rate": 0.00020872602739726025,
825
+ "loss": 0.6287,
826
+ "step": 2725
827
+ },
828
+ {
829
+ "epoch": 3.5214606021780908,
830
+ "grad_norm": 3.614025354385376,
831
+ "learning_rate": 0.0002076986301369863,
832
+ "loss": 0.6982,
833
+ "step": 2750
834
+ },
835
+ {
836
+ "epoch": 3.55349135169763,
837
+ "grad_norm": 1.3554790019989014,
838
+ "learning_rate": 0.0002066712328767123,
839
+ "loss": 0.604,
840
+ "step": 2775
841
+ },
842
+ {
843
+ "epoch": 3.5855221012171685,
844
+ "grad_norm": 1.638237476348877,
845
+ "learning_rate": 0.00020564383561643836,
846
+ "loss": 0.7113,
847
+ "step": 2800
848
+ },
849
+ {
850
+ "epoch": 3.5855221012171685,
851
+ "eval_loss": 0.7320713996887207,
852
+ "eval_runtime": 192.2342,
853
+ "eval_samples_per_second": 15.564,
854
+ "eval_steps_per_second": 1.946,
855
+ "eval_wer": 0.6938207136640557,
856
+ "step": 2800
857
+ },
858
+ {
859
+ "epoch": 3.617552850736707,
860
+ "grad_norm": 2.5015342235565186,
861
+ "learning_rate": 0.00020461643835616438,
862
+ "loss": 0.6208,
863
+ "step": 2825
864
+ },
865
+ {
866
+ "epoch": 3.649583600256246,
867
+ "grad_norm": 1.5744799375534058,
868
+ "learning_rate": 0.0002035890410958904,
869
+ "loss": 0.7336,
870
+ "step": 2850
871
+ },
872
+ {
873
+ "epoch": 3.681614349775785,
874
+ "grad_norm": 1.980490803718567,
875
+ "learning_rate": 0.0002025616438356164,
876
+ "loss": 0.5763,
877
+ "step": 2875
878
+ },
879
+ {
880
+ "epoch": 3.7136450992953236,
881
+ "grad_norm": 1.333608627319336,
882
+ "learning_rate": 0.00020153424657534243,
883
+ "loss": 0.693,
884
+ "step": 2900
885
+ },
886
+ {
887
+ "epoch": 3.7456758488148623,
888
+ "grad_norm": 1.21135675907135,
889
+ "learning_rate": 0.00020050684931506845,
890
+ "loss": 0.6162,
891
+ "step": 2925
892
+ },
893
+ {
894
+ "epoch": 3.777706598334401,
895
+ "grad_norm": 1.436661958694458,
896
+ "learning_rate": 0.00019947945205479452,
897
+ "loss": 0.7126,
898
+ "step": 2950
899
+ },
900
+ {
901
+ "epoch": 3.8097373478539396,
902
+ "grad_norm": 1.2120234966278076,
903
+ "learning_rate": 0.00019845205479452054,
904
+ "loss": 0.6439,
905
+ "step": 2975
906
+ },
907
+ {
908
+ "epoch": 3.8417680973734787,
909
+ "grad_norm": 1.5366668701171875,
910
+ "learning_rate": 0.00019742465753424656,
911
+ "loss": 0.6959,
912
+ "step": 3000
913
+ },
914
+ {
915
+ "epoch": 3.8737988468930173,
916
+ "grad_norm": 1.171915888786316,
917
+ "learning_rate": 0.00019639726027397258,
918
+ "loss": 0.6143,
919
+ "step": 3025
920
+ },
921
+ {
922
+ "epoch": 3.905829596412556,
923
+ "grad_norm": 1.1893322467803955,
924
+ "learning_rate": 0.00019536986301369862,
925
+ "loss": 0.7201,
926
+ "step": 3050
927
+ },
928
+ {
929
+ "epoch": 3.9378603459320947,
930
+ "grad_norm": 2.19003963470459,
931
+ "learning_rate": 0.00019434246575342464,
932
+ "loss": 0.6301,
933
+ "step": 3075
934
+ },
935
+ {
936
+ "epoch": 3.9698910954516338,
937
+ "grad_norm": 1.0006098747253418,
938
+ "learning_rate": 0.00019331506849315066,
939
+ "loss": 0.6753,
940
+ "step": 3100
941
+ },
942
+ {
943
+ "epoch": 4.001281229980782,
944
+ "grad_norm": 2.3398594856262207,
945
+ "learning_rate": 0.00019228767123287668,
946
+ "loss": 0.6019,
947
+ "step": 3125
948
+ },
949
+ {
950
+ "epoch": 4.03331197950032,
951
+ "grad_norm": 1.7329877614974976,
952
+ "learning_rate": 0.0001912602739726027,
953
+ "loss": 0.5492,
954
+ "step": 3150
955
+ },
956
+ {
957
+ "epoch": 4.065342729019859,
958
+ "grad_norm": 1.0595425367355347,
959
+ "learning_rate": 0.00019023287671232877,
960
+ "loss": 0.5996,
961
+ "step": 3175
962
+ },
963
+ {
964
+ "epoch": 4.097373478539398,
965
+ "grad_norm": 1.6115587949752808,
966
+ "learning_rate": 0.0001892054794520548,
967
+ "loss": 0.5346,
968
+ "step": 3200
969
+ },
970
+ {
971
+ "epoch": 4.097373478539398,
972
+ "eval_loss": 0.7688583731651306,
973
+ "eval_runtime": 192.3934,
974
+ "eval_samples_per_second": 15.551,
975
+ "eval_steps_per_second": 1.944,
976
+ "eval_wer": 0.6896431679721496,
977
+ "step": 3200
978
+ },
979
+ {
980
+ "epoch": 4.129404228058936,
981
+ "grad_norm": 1.158002257347107,
982
+ "learning_rate": 0.0001881780821917808,
983
+ "loss": 0.569,
984
+ "step": 3225
985
+ },
986
+ {
987
+ "epoch": 4.161434977578475,
988
+ "grad_norm": 2.2581615447998047,
989
+ "learning_rate": 0.00018715068493150682,
990
+ "loss": 0.4958,
991
+ "step": 3250
992
+ },
993
+ {
994
+ "epoch": 4.1934657270980145,
995
+ "grad_norm": 1.4523509740829468,
996
+ "learning_rate": 0.00018612328767123287,
997
+ "loss": 0.6063,
998
+ "step": 3275
999
+ },
1000
+ {
1001
+ "epoch": 4.225496476617553,
1002
+ "grad_norm": 1.2673031091690063,
1003
+ "learning_rate": 0.0001850958904109589,
1004
+ "loss": 0.4827,
1005
+ "step": 3300
1006
+ },
1007
+ {
1008
+ "epoch": 4.257527226137092,
1009
+ "grad_norm": 2.416383981704712,
1010
+ "learning_rate": 0.0001840684931506849,
1011
+ "loss": 0.6466,
1012
+ "step": 3325
1013
+ },
1014
+ {
1015
+ "epoch": 4.28955797565663,
1016
+ "grad_norm": 1.7506197690963745,
1017
+ "learning_rate": 0.00018304109589041093,
1018
+ "loss": 0.5158,
1019
+ "step": 3350
1020
+ },
1021
+ {
1022
+ "epoch": 4.321588725176169,
1023
+ "grad_norm": 1.171522617340088,
1024
+ "learning_rate": 0.00018201369863013697,
1025
+ "loss": 0.6242,
1026
+ "step": 3375
1027
+ },
1028
+ {
1029
+ "epoch": 4.353619474695708,
1030
+ "grad_norm": 0.8762041330337524,
1031
+ "learning_rate": 0.00018098630136986302,
1032
+ "loss": 0.5173,
1033
+ "step": 3400
1034
+ },
1035
+ {
1036
+ "epoch": 4.385650224215246,
1037
+ "grad_norm": 1.327751636505127,
1038
+ "learning_rate": 0.00017995890410958903,
1039
+ "loss": 0.6182,
1040
+ "step": 3425
1041
+ },
1042
+ {
1043
+ "epoch": 4.417680973734785,
1044
+ "grad_norm": 1.076515555381775,
1045
+ "learning_rate": 0.00017893150684931505,
1046
+ "loss": 0.5229,
1047
+ "step": 3450
1048
+ },
1049
+ {
1050
+ "epoch": 4.449711723254324,
1051
+ "grad_norm": 1.5693820714950562,
1052
+ "learning_rate": 0.00017790410958904107,
1053
+ "loss": 0.5771,
1054
+ "step": 3475
1055
+ },
1056
+ {
1057
+ "epoch": 4.481742472773863,
1058
+ "grad_norm": 1.3674280643463135,
1059
+ "learning_rate": 0.0001768767123287671,
1060
+ "loss": 0.5237,
1061
+ "step": 3500
1062
+ },
1063
+ {
1064
+ "epoch": 4.513773222293402,
1065
+ "grad_norm": 2.232922315597534,
1066
+ "learning_rate": 0.00017584931506849314,
1067
+ "loss": 0.5916,
1068
+ "step": 3525
1069
+ },
1070
+ {
1071
+ "epoch": 4.545803971812941,
1072
+ "grad_norm": 1.1831066608428955,
1073
+ "learning_rate": 0.00017482191780821915,
1074
+ "loss": 0.5244,
1075
+ "step": 3550
1076
+ },
1077
+ {
1078
+ "epoch": 4.577834721332479,
1079
+ "grad_norm": 1.2374058961868286,
1080
+ "learning_rate": 0.0001737945205479452,
1081
+ "loss": 0.5743,
1082
+ "step": 3575
1083
+ },
1084
+ {
1085
+ "epoch": 4.609865470852018,
1086
+ "grad_norm": 1.7176926136016846,
1087
+ "learning_rate": 0.00017276712328767122,
1088
+ "loss": 0.5107,
1089
+ "step": 3600
1090
+ },
1091
+ {
1092
+ "epoch": 4.609865470852018,
1093
+ "eval_loss": 0.783900797367096,
1094
+ "eval_runtime": 191.9719,
1095
+ "eval_samples_per_second": 15.586,
1096
+ "eval_steps_per_second": 1.948,
1097
+ "eval_wer": 0.678503046127067,
1098
+ "step": 3600
1099
+ }
1100
+ ],
1101
+ "logging_steps": 25,
1102
+ "max_steps": 7800,
1103
+ "num_input_tokens_seen": 0,
1104
+ "num_train_epochs": 10,
1105
+ "save_steps": 400,
1106
+ "stateful_callbacks": {
1107
+ "TrainerControl": {
1108
+ "args": {
1109
+ "should_epoch_stop": false,
1110
+ "should_evaluate": false,
1111
+ "should_log": false,
1112
+ "should_save": true,
1113
+ "should_training_stop": false
1114
+ },
1115
+ "attributes": {}
1116
+ }
1117
+ },
1118
+ "total_flos": 1.4784767862106845e+19,
1119
+ "train_batch_size": 16,
1120
+ "trial_name": null,
1121
+ "trial_params": null
1122
+ }
checkpoint-3600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
3
+ size 5304
checkpoint-4000/config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
3
+ "activation_dropout": 0.05,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 256,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 55,
89
+ "proj_codevector_dim": 256,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.49.0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 58,
115
+ "xvector_output_dim": 512
116
+ }
checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cfc8debbea0680751a4c88ef792bc391c674c9d3896130f1f6dc345947463fb
3
+ size 1262045280
checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f5d27d817401408cb6e2b544c31d3cc00780f464363566c8cbb1a222b03c1f
3
+ size 2490635318
checkpoint-4000/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff8fce8d641585566b50c7928d325eeab081c421015cd5034dd58cd9c9818e6
3
+ size 14244
checkpoint-4000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b88d751b9e3f0246fc279325a672185d15e3efdb16f5db937f547385d1a6aa7c
3
+ size 988
checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc5d31122ddd1ef2ad27fc0f44878b7c9a9d31a70fbd1d1e18b8669bac51aec
3
+ size 1064
checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,1243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.678503046127067,
3
+ "best_model_checkpoint": "./wav2vec2-large-xlsr-ar/checkpoint-3600",
4
+ "epoch": 5.121716848174247,
5
+ "eval_steps": 400,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.032030749519538756,
13
+ "grad_norm": 40.31281661987305,
14
+ "learning_rate": 1.3799999999999998e-05,
15
+ "loss": 25.5997,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.06406149903907751,
20
+ "grad_norm": 70.23408508300781,
21
+ "learning_rate": 2.7599999999999997e-05,
22
+ "loss": 30.9717,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.09609224855861627,
27
+ "grad_norm": 3.012640953063965,
28
+ "learning_rate": 4.259999999999999e-05,
29
+ "loss": 9.1896,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.12812299807815503,
34
+ "grad_norm": 30.363168716430664,
35
+ "learning_rate": 5.76e-05,
36
+ "loss": 12.5434,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.1601537475976938,
41
+ "grad_norm": 13.795882225036621,
42
+ "learning_rate": 7.259999999999999e-05,
43
+ "loss": 5.814,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.19218449711723254,
48
+ "grad_norm": 99.32404327392578,
49
+ "learning_rate": 8.759999999999999e-05,
50
+ "loss": 9.7835,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.2242152466367713,
55
+ "grad_norm": 4.108926773071289,
56
+ "learning_rate": 0.0001026,
57
+ "loss": 5.1191,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.25624599615631005,
62
+ "grad_norm": 7.492372512817383,
63
+ "learning_rate": 0.0001176,
64
+ "loss": 3.7315,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.2882767456758488,
69
+ "grad_norm": 7.183516502380371,
70
+ "learning_rate": 0.0001326,
71
+ "loss": 3.6219,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.3203074951953876,
76
+ "grad_norm": 2.490111827850342,
77
+ "learning_rate": 0.00014759999999999998,
78
+ "loss": 3.6824,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.3523382447149263,
83
+ "grad_norm": 0.5032612681388855,
84
+ "learning_rate": 0.0001626,
85
+ "loss": 3.5972,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.3843689942344651,
90
+ "grad_norm": 3.7791531085968018,
91
+ "learning_rate": 0.00017759999999999998,
92
+ "loss": 3.6051,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.41639974375400385,
97
+ "grad_norm": 2.6355323791503906,
98
+ "learning_rate": 0.0001926,
99
+ "loss": 3.5084,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.4484304932735426,
104
+ "grad_norm": 1.8424248695373535,
105
+ "learning_rate": 0.00020759999999999998,
106
+ "loss": 3.6254,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.48046124279308133,
111
+ "grad_norm": 2.8042099475860596,
112
+ "learning_rate": 0.0002226,
113
+ "loss": 3.5539,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.5124919923126201,
118
+ "grad_norm": 1.1292295455932617,
119
+ "learning_rate": 0.0002376,
120
+ "loss": 3.5401,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.5124919923126201,
125
+ "eval_loss": 3.4790358543395996,
126
+ "eval_runtime": 190.1673,
127
+ "eval_samples_per_second": 15.734,
128
+ "eval_steps_per_second": 1.967,
129
+ "eval_wer": 1.0,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.5445227418321589,
134
+ "grad_norm": 5.499414443969727,
135
+ "learning_rate": 0.00025259999999999996,
136
+ "loss": 3.4746,
137
+ "step": 425
138
+ },
139
+ {
140
+ "epoch": 0.5765534913516976,
141
+ "grad_norm": 3.2174575328826904,
142
+ "learning_rate": 0.0002676,
143
+ "loss": 3.5553,
144
+ "step": 450
145
+ },
146
+ {
147
+ "epoch": 0.6085842408712364,
148
+ "grad_norm": 0.8652946352958679,
149
+ "learning_rate": 0.0002826,
150
+ "loss": 3.4252,
151
+ "step": 475
152
+ },
153
+ {
154
+ "epoch": 0.6406149903907752,
155
+ "grad_norm": 2.645606517791748,
156
+ "learning_rate": 0.00029759999999999997,
157
+ "loss": 3.5508,
158
+ "step": 500
159
+ },
160
+ {
161
+ "epoch": 0.672645739910314,
162
+ "grad_norm": 0.9275538921356201,
163
+ "learning_rate": 0.00029913698630136987,
164
+ "loss": 3.4857,
165
+ "step": 525
166
+ },
167
+ {
168
+ "epoch": 0.7046764894298526,
169
+ "grad_norm": 1.0166261196136475,
170
+ "learning_rate": 0.00029810958904109586,
171
+ "loss": 3.5138,
172
+ "step": 550
173
+ },
174
+ {
175
+ "epoch": 0.7367072389493914,
176
+ "grad_norm": 4.685708045959473,
177
+ "learning_rate": 0.0002970821917808219,
178
+ "loss": 3.4017,
179
+ "step": 575
180
+ },
181
+ {
182
+ "epoch": 0.7687379884689302,
183
+ "grad_norm": 1.1367671489715576,
184
+ "learning_rate": 0.0002960547945205479,
185
+ "loss": 3.4927,
186
+ "step": 600
187
+ },
188
+ {
189
+ "epoch": 0.8007687379884689,
190
+ "grad_norm": 2.971071481704712,
191
+ "learning_rate": 0.00029502739726027395,
192
+ "loss": 3.4265,
193
+ "step": 625
194
+ },
195
+ {
196
+ "epoch": 0.8327994875080077,
197
+ "grad_norm": 2.657762289047241,
198
+ "learning_rate": 0.000294,
199
+ "loss": 3.4645,
200
+ "step": 650
201
+ },
202
+ {
203
+ "epoch": 0.8648302370275465,
204
+ "grad_norm": 2.87245774269104,
205
+ "learning_rate": 0.000292972602739726,
206
+ "loss": 3.3777,
207
+ "step": 675
208
+ },
209
+ {
210
+ "epoch": 0.8968609865470852,
211
+ "grad_norm": 1.2535868883132935,
212
+ "learning_rate": 0.00029194520547945203,
213
+ "loss": 3.4021,
214
+ "step": 700
215
+ },
216
+ {
217
+ "epoch": 0.928891736066624,
218
+ "grad_norm": 0.7670681476593018,
219
+ "learning_rate": 0.00029091780821917807,
220
+ "loss": 3.3667,
221
+ "step": 725
222
+ },
223
+ {
224
+ "epoch": 0.9609224855861627,
225
+ "grad_norm": 1.4213225841522217,
226
+ "learning_rate": 0.0002898904109589041,
227
+ "loss": 3.3387,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 0.9929532351057014,
232
+ "grad_norm": 1.161726951599121,
233
+ "learning_rate": 0.0002888630136986301,
234
+ "loss": 2.7481,
235
+ "step": 775
236
+ },
237
+ {
238
+ "epoch": 1.0243433696348494,
239
+ "grad_norm": 2.047264337539673,
240
+ "learning_rate": 0.00028783561643835616,
241
+ "loss": 2.002,
242
+ "step": 800
243
+ },
244
+ {
245
+ "epoch": 1.0243433696348494,
246
+ "eval_loss": 1.6117621660232544,
247
+ "eval_runtime": 190.6575,
248
+ "eval_samples_per_second": 15.693,
249
+ "eval_steps_per_second": 1.962,
250
+ "eval_wer": 0.9870322019147084,
251
+ "step": 800
252
+ },
253
+ {
254
+ "epoch": 1.0563741191543883,
255
+ "grad_norm": 3.6471316814422607,
256
+ "learning_rate": 0.00028680821917808215,
257
+ "loss": 1.7004,
258
+ "step": 825
259
+ },
260
+ {
261
+ "epoch": 1.088404868673927,
262
+ "grad_norm": 1.8413678407669067,
263
+ "learning_rate": 0.0002857808219178082,
264
+ "loss": 1.4543,
265
+ "step": 850
266
+ },
267
+ {
268
+ "epoch": 1.1204356181934658,
269
+ "grad_norm": 2.3082125186920166,
270
+ "learning_rate": 0.00028475342465753424,
271
+ "loss": 1.3853,
272
+ "step": 875
273
+ },
274
+ {
275
+ "epoch": 1.1524663677130045,
276
+ "grad_norm": 1.7227452993392944,
277
+ "learning_rate": 0.00028372602739726023,
278
+ "loss": 1.336,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 1.1844971172325431,
283
+ "grad_norm": 2.1250977516174316,
284
+ "learning_rate": 0.0002826986301369863,
285
+ "loss": 1.349,
286
+ "step": 925
287
+ },
288
+ {
289
+ "epoch": 1.216527866752082,
290
+ "grad_norm": 1.6314936876296997,
291
+ "learning_rate": 0.0002816712328767123,
292
+ "loss": 1.2656,
293
+ "step": 950
294
+ },
295
+ {
296
+ "epoch": 1.2485586162716207,
297
+ "grad_norm": 2.001681089401245,
298
+ "learning_rate": 0.00028064383561643837,
299
+ "loss": 1.2556,
300
+ "step": 975
301
+ },
302
+ {
303
+ "epoch": 1.2805893657911596,
304
+ "grad_norm": 2.1396918296813965,
305
+ "learning_rate": 0.00027961643835616436,
306
+ "loss": 1.1932,
307
+ "step": 1000
308
+ },
309
+ {
310
+ "epoch": 1.3126201153106982,
311
+ "grad_norm": 1.9855870008468628,
312
+ "learning_rate": 0.0002785890410958904,
313
+ "loss": 1.1991,
314
+ "step": 1025
315
+ },
316
+ {
317
+ "epoch": 1.344650864830237,
318
+ "grad_norm": 1.1789072751998901,
319
+ "learning_rate": 0.0002775616438356164,
320
+ "loss": 1.1041,
321
+ "step": 1050
322
+ },
323
+ {
324
+ "epoch": 1.3766816143497758,
325
+ "grad_norm": 2.304903507232666,
326
+ "learning_rate": 0.00027653424657534244,
327
+ "loss": 1.1538,
328
+ "step": 1075
329
+ },
330
+ {
331
+ "epoch": 1.4087123638693146,
332
+ "grad_norm": 2.459096670150757,
333
+ "learning_rate": 0.00027550684931506843,
334
+ "loss": 1.2379,
335
+ "step": 1100
336
+ },
337
+ {
338
+ "epoch": 1.4407431133888533,
339
+ "grad_norm": 1.886155605316162,
340
+ "learning_rate": 0.00027447945205479453,
341
+ "loss": 1.1326,
342
+ "step": 1125
343
+ },
344
+ {
345
+ "epoch": 1.472773862908392,
346
+ "grad_norm": 1.1992785930633545,
347
+ "learning_rate": 0.0002734520547945205,
348
+ "loss": 1.0691,
349
+ "step": 1150
350
+ },
351
+ {
352
+ "epoch": 1.5048046124279308,
353
+ "grad_norm": 3.249142646789551,
354
+ "learning_rate": 0.00027242465753424657,
355
+ "loss": 1.0511,
356
+ "step": 1175
357
+ },
358
+ {
359
+ "epoch": 1.5368353619474697,
360
+ "grad_norm": 3.3806302547454834,
361
+ "learning_rate": 0.00027139726027397256,
362
+ "loss": 1.0618,
363
+ "step": 1200
364
+ },
365
+ {
366
+ "epoch": 1.5368353619474697,
367
+ "eval_loss": 1.0360716581344604,
368
+ "eval_runtime": 191.2082,
369
+ "eval_samples_per_second": 15.648,
370
+ "eval_steps_per_second": 1.956,
371
+ "eval_wer": 0.8269799825935596,
372
+ "step": 1200
373
+ },
374
+ {
375
+ "epoch": 1.5688661114670084,
376
+ "grad_norm": 2.508125066757202,
377
+ "learning_rate": 0.0002703698630136986,
378
+ "loss": 1.0652,
379
+ "step": 1225
380
+ },
381
+ {
382
+ "epoch": 1.600896860986547,
383
+ "grad_norm": 1.3717399835586548,
384
+ "learning_rate": 0.00026934246575342465,
385
+ "loss": 1.0354,
386
+ "step": 1250
387
+ },
388
+ {
389
+ "epoch": 1.6329276105060857,
390
+ "grad_norm": 1.5011590719223022,
391
+ "learning_rate": 0.00026831506849315064,
392
+ "loss": 1.0427,
393
+ "step": 1275
394
+ },
395
+ {
396
+ "epoch": 1.6649583600256246,
397
+ "grad_norm": 1.6448092460632324,
398
+ "learning_rate": 0.0002672876712328767,
399
+ "loss": 1.0498,
400
+ "step": 1300
401
+ },
402
+ {
403
+ "epoch": 1.6969891095451635,
404
+ "grad_norm": 1.4456716775894165,
405
+ "learning_rate": 0.0002662602739726027,
406
+ "loss": 1.0481,
407
+ "step": 1325
408
+ },
409
+ {
410
+ "epoch": 1.7290198590647021,
411
+ "grad_norm": 1.2869809865951538,
412
+ "learning_rate": 0.0002652328767123288,
413
+ "loss": 1.0271,
414
+ "step": 1350
415
+ },
416
+ {
417
+ "epoch": 1.7610506085842408,
418
+ "grad_norm": 4.315392971038818,
419
+ "learning_rate": 0.00026420547945205477,
420
+ "loss": 0.978,
421
+ "step": 1375
422
+ },
423
+ {
424
+ "epoch": 1.7930813581037797,
425
+ "grad_norm": 1.3269984722137451,
426
+ "learning_rate": 0.0002631780821917808,
427
+ "loss": 0.9891,
428
+ "step": 1400
429
+ },
430
+ {
431
+ "epoch": 1.8251121076233185,
432
+ "grad_norm": 1.6529700756072998,
433
+ "learning_rate": 0.0002621506849315068,
434
+ "loss": 0.9917,
435
+ "step": 1425
436
+ },
437
+ {
438
+ "epoch": 1.8571428571428572,
439
+ "grad_norm": 2.1668319702148438,
440
+ "learning_rate": 0.00026112328767123285,
441
+ "loss": 0.9745,
442
+ "step": 1450
443
+ },
444
+ {
445
+ "epoch": 1.8891736066623959,
446
+ "grad_norm": 6.553292751312256,
447
+ "learning_rate": 0.0002600958904109589,
448
+ "loss": 0.9898,
449
+ "step": 1475
450
+ },
451
+ {
452
+ "epoch": 1.9212043561819345,
453
+ "grad_norm": 1.2242108583450317,
454
+ "learning_rate": 0.0002590684931506849,
455
+ "loss": 0.9363,
456
+ "step": 1500
457
+ },
458
+ {
459
+ "epoch": 1.9532351057014734,
460
+ "grad_norm": 2.026926040649414,
461
+ "learning_rate": 0.00025804109589041093,
462
+ "loss": 0.9731,
463
+ "step": 1525
464
+ },
465
+ {
466
+ "epoch": 1.9852658552210123,
467
+ "grad_norm": 1.560719609260559,
468
+ "learning_rate": 0.000257013698630137,
469
+ "loss": 0.8685,
470
+ "step": 1550
471
+ },
472
+ {
473
+ "epoch": 2.01665598975016,
474
+ "grad_norm": 1.0007785558700562,
475
+ "learning_rate": 0.000255986301369863,
476
+ "loss": 0.8586,
477
+ "step": 1575
478
+ },
479
+ {
480
+ "epoch": 2.048686739269699,
481
+ "grad_norm": 1.0924744606018066,
482
+ "learning_rate": 0.000254958904109589,
483
+ "loss": 0.8025,
484
+ "step": 1600
485
+ },
486
+ {
487
+ "epoch": 2.048686739269699,
488
+ "eval_loss": 0.8233081102371216,
489
+ "eval_runtime": 191.163,
490
+ "eval_samples_per_second": 15.652,
491
+ "eval_steps_per_second": 1.956,
492
+ "eval_wer": 0.751348999129678,
493
+ "step": 1600
494
+ },
495
+ {
496
+ "epoch": 2.0807174887892375,
497
+ "grad_norm": 1.9123071432113647,
498
+ "learning_rate": 0.00025393150684931506,
499
+ "loss": 0.8919,
500
+ "step": 1625
501
+ },
502
+ {
503
+ "epoch": 2.1127482383087766,
504
+ "grad_norm": 1.7388331890106201,
505
+ "learning_rate": 0.00025290410958904105,
506
+ "loss": 0.8667,
507
+ "step": 1650
508
+ },
509
+ {
510
+ "epoch": 2.144778987828315,
511
+ "grad_norm": 1.247045874595642,
512
+ "learning_rate": 0.0002518767123287671,
513
+ "loss": 0.8716,
514
+ "step": 1675
515
+ },
516
+ {
517
+ "epoch": 2.176809737347854,
518
+ "grad_norm": 2.1057279109954834,
519
+ "learning_rate": 0.00025084931506849314,
520
+ "loss": 0.7654,
521
+ "step": 1700
522
+ },
523
+ {
524
+ "epoch": 2.2088404868673925,
525
+ "grad_norm": 1.0074440240859985,
526
+ "learning_rate": 0.00024982191780821913,
527
+ "loss": 0.8732,
528
+ "step": 1725
529
+ },
530
+ {
531
+ "epoch": 2.2408712363869316,
532
+ "grad_norm": 2.159853219985962,
533
+ "learning_rate": 0.0002487945205479452,
534
+ "loss": 0.7941,
535
+ "step": 1750
536
+ },
537
+ {
538
+ "epoch": 2.2729019859064703,
539
+ "grad_norm": 1.8050284385681152,
540
+ "learning_rate": 0.0002477671232876712,
541
+ "loss": 0.8496,
542
+ "step": 1775
543
+ },
544
+ {
545
+ "epoch": 2.304932735426009,
546
+ "grad_norm": 1.3987536430358887,
547
+ "learning_rate": 0.00024673972602739727,
548
+ "loss": 0.8332,
549
+ "step": 1800
550
+ },
551
+ {
552
+ "epoch": 2.3369634849455476,
553
+ "grad_norm": 2.5281410217285156,
554
+ "learning_rate": 0.00024571232876712326,
555
+ "loss": 0.8135,
556
+ "step": 1825
557
+ },
558
+ {
559
+ "epoch": 2.3689942344650863,
560
+ "grad_norm": 1.914908766746521,
561
+ "learning_rate": 0.0002446849315068493,
562
+ "loss": 0.8067,
563
+ "step": 1850
564
+ },
565
+ {
566
+ "epoch": 2.4010249839846254,
567
+ "grad_norm": 1.1049237251281738,
568
+ "learning_rate": 0.00024365753424657533,
569
+ "loss": 0.8027,
570
+ "step": 1875
571
+ },
572
+ {
573
+ "epoch": 2.433055733504164,
574
+ "grad_norm": 2.2418999671936035,
575
+ "learning_rate": 0.00024263013698630134,
576
+ "loss": 0.7857,
577
+ "step": 1900
578
+ },
579
+ {
580
+ "epoch": 2.4650864830237027,
581
+ "grad_norm": 1.4093470573425293,
582
+ "learning_rate": 0.00024160273972602736,
583
+ "loss": 0.8265,
584
+ "step": 1925
585
+ },
586
+ {
587
+ "epoch": 2.4971172325432414,
588
+ "grad_norm": 1.396606206893921,
589
+ "learning_rate": 0.0002405753424657534,
590
+ "loss": 0.7601,
591
+ "step": 1950
592
+ },
593
+ {
594
+ "epoch": 2.5291479820627805,
595
+ "grad_norm": 1.2854044437408447,
596
+ "learning_rate": 0.00023954794520547945,
597
+ "loss": 0.8408,
598
+ "step": 1975
599
+ },
600
+ {
601
+ "epoch": 2.561178731582319,
602
+ "grad_norm": 2.2053070068359375,
603
+ "learning_rate": 0.00023852054794520547,
604
+ "loss": 0.7199,
605
+ "step": 2000
606
+ },
607
+ {
608
+ "epoch": 2.561178731582319,
609
+ "eval_loss": 0.7817878127098083,
610
+ "eval_runtime": 192.0652,
611
+ "eval_samples_per_second": 15.578,
612
+ "eval_steps_per_second": 1.947,
613
+ "eval_wer": 0.7203655352480418,
614
+ "step": 2000
615
+ },
616
+ {
617
+ "epoch": 2.593209481101858,
618
+ "grad_norm": 1.6103401184082031,
619
+ "learning_rate": 0.0002374931506849315,
620
+ "loss": 0.7848,
621
+ "step": 2025
622
+ },
623
+ {
624
+ "epoch": 2.6252402306213964,
625
+ "grad_norm": 3.00805401802063,
626
+ "learning_rate": 0.0002364657534246575,
627
+ "loss": 0.7485,
628
+ "step": 2050
629
+ },
630
+ {
631
+ "epoch": 2.657270980140935,
632
+ "grad_norm": 1.0826023817062378,
633
+ "learning_rate": 0.00023543835616438353,
634
+ "loss": 0.8131,
635
+ "step": 2075
636
+ },
637
+ {
638
+ "epoch": 2.689301729660474,
639
+ "grad_norm": 2.3294951915740967,
640
+ "learning_rate": 0.00023441095890410955,
641
+ "loss": 0.7616,
642
+ "step": 2100
643
+ },
644
+ {
645
+ "epoch": 2.721332479180013,
646
+ "grad_norm": 1.232429027557373,
647
+ "learning_rate": 0.0002333835616438356,
648
+ "loss": 0.7925,
649
+ "step": 2125
650
+ },
651
+ {
652
+ "epoch": 2.7533632286995515,
653
+ "grad_norm": 1.8985693454742432,
654
+ "learning_rate": 0.00023235616438356164,
655
+ "loss": 0.7829,
656
+ "step": 2150
657
+ },
658
+ {
659
+ "epoch": 2.78539397821909,
660
+ "grad_norm": 1.1546630859375,
661
+ "learning_rate": 0.00023132876712328765,
662
+ "loss": 0.8053,
663
+ "step": 2175
664
+ },
665
+ {
666
+ "epoch": 2.8174247277386293,
667
+ "grad_norm": 1.2817527055740356,
668
+ "learning_rate": 0.00023030136986301367,
669
+ "loss": 0.7452,
670
+ "step": 2200
671
+ },
672
+ {
673
+ "epoch": 2.849455477258168,
674
+ "grad_norm": 1.7231945991516113,
675
+ "learning_rate": 0.00022927397260273972,
676
+ "loss": 0.7817,
677
+ "step": 2225
678
+ },
679
+ {
680
+ "epoch": 2.8814862267777066,
681
+ "grad_norm": 2.686530113220215,
682
+ "learning_rate": 0.00022824657534246574,
683
+ "loss": 0.7506,
684
+ "step": 2250
685
+ },
686
+ {
687
+ "epoch": 2.9135169762972453,
688
+ "grad_norm": 1.3286162614822388,
689
+ "learning_rate": 0.00022721917808219176,
690
+ "loss": 0.7947,
691
+ "step": 2275
692
+ },
693
+ {
694
+ "epoch": 2.945547725816784,
695
+ "grad_norm": 1.3969508409500122,
696
+ "learning_rate": 0.00022619178082191777,
697
+ "loss": 0.7174,
698
+ "step": 2300
699
+ },
700
+ {
701
+ "epoch": 2.977578475336323,
702
+ "grad_norm": 1.835070013999939,
703
+ "learning_rate": 0.0002251643835616438,
704
+ "loss": 0.7531,
705
+ "step": 2325
706
+ },
707
+ {
708
+ "epoch": 3.008968609865471,
709
+ "grad_norm": 2.8138489723205566,
710
+ "learning_rate": 0.00022413698630136986,
711
+ "loss": 0.7638,
712
+ "step": 2350
713
+ },
714
+ {
715
+ "epoch": 3.0409993593850095,
716
+ "grad_norm": 1.0872498750686646,
717
+ "learning_rate": 0.00022310958904109588,
718
+ "loss": 0.6102,
719
+ "step": 2375
720
+ },
721
+ {
722
+ "epoch": 3.073030108904548,
723
+ "grad_norm": 2.0995171070098877,
724
+ "learning_rate": 0.0002220821917808219,
725
+ "loss": 0.675,
726
+ "step": 2400
727
+ },
728
+ {
729
+ "epoch": 3.073030108904548,
730
+ "eval_loss": 0.8002874255180359,
731
+ "eval_runtime": 191.1551,
732
+ "eval_samples_per_second": 15.652,
733
+ "eval_steps_per_second": 1.957,
734
+ "eval_wer": 0.7121845082680592,
735
+ "step": 2400
736
+ },
737
+ {
738
+ "epoch": 3.1050608584240873,
739
+ "grad_norm": 1.8755576610565186,
740
+ "learning_rate": 0.00022105479452054792,
741
+ "loss": 0.5922,
742
+ "step": 2425
743
+ },
744
+ {
745
+ "epoch": 3.137091607943626,
746
+ "grad_norm": 0.8880970478057861,
747
+ "learning_rate": 0.00022002739726027397,
748
+ "loss": 0.6929,
749
+ "step": 2450
750
+ },
751
+ {
752
+ "epoch": 3.1691223574631646,
753
+ "grad_norm": 2.357203722000122,
754
+ "learning_rate": 0.00021899999999999998,
755
+ "loss": 0.5997,
756
+ "step": 2475
757
+ },
758
+ {
759
+ "epoch": 3.2011531069827033,
760
+ "grad_norm": 0.9104003310203552,
761
+ "learning_rate": 0.000217972602739726,
762
+ "loss": 0.728,
763
+ "step": 2500
764
+ },
765
+ {
766
+ "epoch": 3.233183856502242,
767
+ "grad_norm": 1.5452452898025513,
768
+ "learning_rate": 0.00021694520547945202,
769
+ "loss": 0.5799,
770
+ "step": 2525
771
+ },
772
+ {
773
+ "epoch": 3.265214606021781,
774
+ "grad_norm": 0.8508313894271851,
775
+ "learning_rate": 0.00021591780821917807,
776
+ "loss": 0.6859,
777
+ "step": 2550
778
+ },
779
+ {
780
+ "epoch": 3.2972453555413197,
781
+ "grad_norm": 1.8509936332702637,
782
+ "learning_rate": 0.0002148904109589041,
783
+ "loss": 0.6178,
784
+ "step": 2575
785
+ },
786
+ {
787
+ "epoch": 3.3292761050608584,
788
+ "grad_norm": 1.355774998664856,
789
+ "learning_rate": 0.00021386301369863013,
790
+ "loss": 0.6908,
791
+ "step": 2600
792
+ },
793
+ {
794
+ "epoch": 3.361306854580397,
795
+ "grad_norm": 1.322189211845398,
796
+ "learning_rate": 0.00021283561643835615,
797
+ "loss": 0.6134,
798
+ "step": 2625
799
+ },
800
+ {
801
+ "epoch": 3.393337604099936,
802
+ "grad_norm": 1.606176495552063,
803
+ "learning_rate": 0.00021180821917808217,
804
+ "loss": 0.6974,
805
+ "step": 2650
806
+ },
807
+ {
808
+ "epoch": 3.425368353619475,
809
+ "grad_norm": 1.7283929586410522,
810
+ "learning_rate": 0.00021078082191780818,
811
+ "loss": 0.6069,
812
+ "step": 2675
813
+ },
814
+ {
815
+ "epoch": 3.4573991031390134,
816
+ "grad_norm": 1.2916626930236816,
817
+ "learning_rate": 0.00020975342465753423,
818
+ "loss": 0.7165,
819
+ "step": 2700
820
+ },
821
+ {
822
+ "epoch": 3.489429852658552,
823
+ "grad_norm": 1.8512370586395264,
824
+ "learning_rate": 0.00020872602739726025,
825
+ "loss": 0.6287,
826
+ "step": 2725
827
+ },
828
+ {
829
+ "epoch": 3.5214606021780908,
830
+ "grad_norm": 3.614025354385376,
831
+ "learning_rate": 0.0002076986301369863,
832
+ "loss": 0.6982,
833
+ "step": 2750
834
+ },
835
+ {
836
+ "epoch": 3.55349135169763,
837
+ "grad_norm": 1.3554790019989014,
838
+ "learning_rate": 0.0002066712328767123,
839
+ "loss": 0.604,
840
+ "step": 2775
841
+ },
842
+ {
843
+ "epoch": 3.5855221012171685,
844
+ "grad_norm": 1.638237476348877,
845
+ "learning_rate": 0.00020564383561643836,
846
+ "loss": 0.7113,
847
+ "step": 2800
848
+ },
849
+ {
850
+ "epoch": 3.5855221012171685,
851
+ "eval_loss": 0.7320713996887207,
852
+ "eval_runtime": 192.2342,
853
+ "eval_samples_per_second": 15.564,
854
+ "eval_steps_per_second": 1.946,
855
+ "eval_wer": 0.6938207136640557,
856
+ "step": 2800
857
+ },
858
+ {
859
+ "epoch": 3.617552850736707,
860
+ "grad_norm": 2.5015342235565186,
861
+ "learning_rate": 0.00020461643835616438,
862
+ "loss": 0.6208,
863
+ "step": 2825
864
+ },
865
+ {
866
+ "epoch": 3.649583600256246,
867
+ "grad_norm": 1.5744799375534058,
868
+ "learning_rate": 0.0002035890410958904,
869
+ "loss": 0.7336,
870
+ "step": 2850
871
+ },
872
+ {
873
+ "epoch": 3.681614349775785,
874
+ "grad_norm": 1.980490803718567,
875
+ "learning_rate": 0.0002025616438356164,
876
+ "loss": 0.5763,
877
+ "step": 2875
878
+ },
879
+ {
880
+ "epoch": 3.7136450992953236,
881
+ "grad_norm": 1.333608627319336,
882
+ "learning_rate": 0.00020153424657534243,
883
+ "loss": 0.693,
884
+ "step": 2900
885
+ },
886
+ {
887
+ "epoch": 3.7456758488148623,
888
+ "grad_norm": 1.21135675907135,
889
+ "learning_rate": 0.00020050684931506845,
890
+ "loss": 0.6162,
891
+ "step": 2925
892
+ },
893
+ {
894
+ "epoch": 3.777706598334401,
895
+ "grad_norm": 1.436661958694458,
896
+ "learning_rate": 0.00019947945205479452,
897
+ "loss": 0.7126,
898
+ "step": 2950
899
+ },
900
+ {
901
+ "epoch": 3.8097373478539396,
902
+ "grad_norm": 1.2120234966278076,
903
+ "learning_rate": 0.00019845205479452054,
904
+ "loss": 0.6439,
905
+ "step": 2975
906
+ },
907
+ {
908
+ "epoch": 3.8417680973734787,
909
+ "grad_norm": 1.5366668701171875,
910
+ "learning_rate": 0.00019742465753424656,
911
+ "loss": 0.6959,
912
+ "step": 3000
913
+ },
914
+ {
915
+ "epoch": 3.8737988468930173,
916
+ "grad_norm": 1.171915888786316,
917
+ "learning_rate": 0.00019639726027397258,
918
+ "loss": 0.6143,
919
+ "step": 3025
920
+ },
921
+ {
922
+ "epoch": 3.905829596412556,
923
+ "grad_norm": 1.1893322467803955,
924
+ "learning_rate": 0.00019536986301369862,
925
+ "loss": 0.7201,
926
+ "step": 3050
927
+ },
928
+ {
929
+ "epoch": 3.9378603459320947,
930
+ "grad_norm": 2.19003963470459,
931
+ "learning_rate": 0.00019434246575342464,
932
+ "loss": 0.6301,
933
+ "step": 3075
934
+ },
935
+ {
936
+ "epoch": 3.9698910954516338,
937
+ "grad_norm": 1.0006098747253418,
938
+ "learning_rate": 0.00019331506849315066,
939
+ "loss": 0.6753,
940
+ "step": 3100
941
+ },
942
+ {
943
+ "epoch": 4.001281229980782,
944
+ "grad_norm": 2.3398594856262207,
945
+ "learning_rate": 0.00019228767123287668,
946
+ "loss": 0.6019,
947
+ "step": 3125
948
+ },
949
+ {
950
+ "epoch": 4.03331197950032,
951
+ "grad_norm": 1.7329877614974976,
952
+ "learning_rate": 0.0001912602739726027,
953
+ "loss": 0.5492,
954
+ "step": 3150
955
+ },
956
+ {
957
+ "epoch": 4.065342729019859,
958
+ "grad_norm": 1.0595425367355347,
959
+ "learning_rate": 0.00019023287671232877,
960
+ "loss": 0.5996,
961
+ "step": 3175
962
+ },
963
+ {
964
+ "epoch": 4.097373478539398,
965
+ "grad_norm": 1.6115587949752808,
966
+ "learning_rate": 0.0001892054794520548,
967
+ "loss": 0.5346,
968
+ "step": 3200
969
+ },
970
+ {
971
+ "epoch": 4.097373478539398,
972
+ "eval_loss": 0.7688583731651306,
973
+ "eval_runtime": 192.3934,
974
+ "eval_samples_per_second": 15.551,
975
+ "eval_steps_per_second": 1.944,
976
+ "eval_wer": 0.6896431679721496,
977
+ "step": 3200
978
+ },
979
+ {
980
+ "epoch": 4.129404228058936,
981
+ "grad_norm": 1.158002257347107,
982
+ "learning_rate": 0.0001881780821917808,
983
+ "loss": 0.569,
984
+ "step": 3225
985
+ },
986
+ {
987
+ "epoch": 4.161434977578475,
988
+ "grad_norm": 2.2581615447998047,
989
+ "learning_rate": 0.00018715068493150682,
990
+ "loss": 0.4958,
991
+ "step": 3250
992
+ },
993
+ {
994
+ "epoch": 4.1934657270980145,
995
+ "grad_norm": 1.4523509740829468,
996
+ "learning_rate": 0.00018612328767123287,
997
+ "loss": 0.6063,
998
+ "step": 3275
999
+ },
1000
+ {
1001
+ "epoch": 4.225496476617553,
1002
+ "grad_norm": 1.2673031091690063,
1003
+ "learning_rate": 0.0001850958904109589,
1004
+ "loss": 0.4827,
1005
+ "step": 3300
1006
+ },
1007
+ {
1008
+ "epoch": 4.257527226137092,
1009
+ "grad_norm": 2.416383981704712,
1010
+ "learning_rate": 0.0001840684931506849,
1011
+ "loss": 0.6466,
1012
+ "step": 3325
1013
+ },
1014
+ {
1015
+ "epoch": 4.28955797565663,
1016
+ "grad_norm": 1.7506197690963745,
1017
+ "learning_rate": 0.00018304109589041093,
1018
+ "loss": 0.5158,
1019
+ "step": 3350
1020
+ },
1021
+ {
1022
+ "epoch": 4.321588725176169,
1023
+ "grad_norm": 1.171522617340088,
1024
+ "learning_rate": 0.00018201369863013697,
1025
+ "loss": 0.6242,
1026
+ "step": 3375
1027
+ },
1028
+ {
1029
+ "epoch": 4.353619474695708,
1030
+ "grad_norm": 0.8762041330337524,
1031
+ "learning_rate": 0.00018098630136986302,
1032
+ "loss": 0.5173,
1033
+ "step": 3400
1034
+ },
1035
+ {
1036
+ "epoch": 4.385650224215246,
1037
+ "grad_norm": 1.327751636505127,
1038
+ "learning_rate": 0.00017995890410958903,
1039
+ "loss": 0.6182,
1040
+ "step": 3425
1041
+ },
1042
+ {
1043
+ "epoch": 4.417680973734785,
1044
+ "grad_norm": 1.076515555381775,
1045
+ "learning_rate": 0.00017893150684931505,
1046
+ "loss": 0.5229,
1047
+ "step": 3450
1048
+ },
1049
+ {
1050
+ "epoch": 4.449711723254324,
1051
+ "grad_norm": 1.5693820714950562,
1052
+ "learning_rate": 0.00017790410958904107,
1053
+ "loss": 0.5771,
1054
+ "step": 3475
1055
+ },
1056
+ {
1057
+ "epoch": 4.481742472773863,
1058
+ "grad_norm": 1.3674280643463135,
1059
+ "learning_rate": 0.0001768767123287671,
1060
+ "loss": 0.5237,
1061
+ "step": 3500
1062
+ },
1063
+ {
1064
+ "epoch": 4.513773222293402,
1065
+ "grad_norm": 2.232922315597534,
1066
+ "learning_rate": 0.00017584931506849314,
1067
+ "loss": 0.5916,
1068
+ "step": 3525
1069
+ },
1070
+ {
1071
+ "epoch": 4.545803971812941,
1072
+ "grad_norm": 1.1831066608428955,
1073
+ "learning_rate": 0.00017482191780821915,
1074
+ "loss": 0.5244,
1075
+ "step": 3550
1076
+ },
1077
+ {
1078
+ "epoch": 4.577834721332479,
1079
+ "grad_norm": 1.2374058961868286,
1080
+ "learning_rate": 0.0001737945205479452,
1081
+ "loss": 0.5743,
1082
+ "step": 3575
1083
+ },
1084
+ {
1085
+ "epoch": 4.609865470852018,
1086
+ "grad_norm": 1.7176926136016846,
1087
+ "learning_rate": 0.00017276712328767122,
1088
+ "loss": 0.5107,
1089
+ "step": 3600
1090
+ },
1091
+ {
1092
+ "epoch": 4.609865470852018,
1093
+ "eval_loss": 0.783900797367096,
1094
+ "eval_runtime": 191.9719,
1095
+ "eval_samples_per_second": 15.586,
1096
+ "eval_steps_per_second": 1.948,
1097
+ "eval_wer": 0.678503046127067,
1098
+ "step": 3600
1099
+ },
1100
+ {
1101
+ "epoch": 4.641896220371557,
1102
+ "grad_norm": 1.3690687417984009,
1103
+ "learning_rate": 0.00017173972602739726,
1104
+ "loss": 0.5845,
1105
+ "step": 3625
1106
+ },
1107
+ {
1108
+ "epoch": 4.673926969891095,
1109
+ "grad_norm": 1.3373069763183594,
1110
+ "learning_rate": 0.00017071232876712328,
1111
+ "loss": 0.5001,
1112
+ "step": 3650
1113
+ },
1114
+ {
1115
+ "epoch": 4.705957719410634,
1116
+ "grad_norm": 1.5431394577026367,
1117
+ "learning_rate": 0.0001696849315068493,
1118
+ "loss": 0.5728,
1119
+ "step": 3675
1120
+ },
1121
+ {
1122
+ "epoch": 4.737988468930173,
1123
+ "grad_norm": 1.4463599920272827,
1124
+ "learning_rate": 0.00016865753424657532,
1125
+ "loss": 0.4824,
1126
+ "step": 3700
1127
+ },
1128
+ {
1129
+ "epoch": 4.770019218449712,
1130
+ "grad_norm": 1.7865560054779053,
1131
+ "learning_rate": 0.00016763013698630134,
1132
+ "loss": 0.5931,
1133
+ "step": 3725
1134
+ },
1135
+ {
1136
+ "epoch": 4.802049967969251,
1137
+ "grad_norm": 1.0858443975448608,
1138
+ "learning_rate": 0.00016660273972602736,
1139
+ "loss": 0.5395,
1140
+ "step": 3750
1141
+ },
1142
+ {
1143
+ "epoch": 4.834080717488789,
1144
+ "grad_norm": 1.9803308248519897,
1145
+ "learning_rate": 0.00016557534246575343,
1146
+ "loss": 0.5776,
1147
+ "step": 3775
1148
+ },
1149
+ {
1150
+ "epoch": 4.866111467008328,
1151
+ "grad_norm": 3.4345428943634033,
1152
+ "learning_rate": 0.00016454794520547945,
1153
+ "loss": 0.4938,
1154
+ "step": 3800
1155
+ },
1156
+ {
1157
+ "epoch": 4.898142216527867,
1158
+ "grad_norm": 0.9586917757987976,
1159
+ "learning_rate": 0.00016352054794520546,
1160
+ "loss": 0.5707,
1161
+ "step": 3825
1162
+ },
1163
+ {
1164
+ "epoch": 4.930172966047405,
1165
+ "grad_norm": 1.524976372718811,
1166
+ "learning_rate": 0.00016249315068493148,
1167
+ "loss": 0.5169,
1168
+ "step": 3850
1169
+ },
1170
+ {
1171
+ "epoch": 4.962203715566944,
1172
+ "grad_norm": 1.1566359996795654,
1173
+ "learning_rate": 0.00016146575342465753,
1174
+ "loss": 0.6073,
1175
+ "step": 3875
1176
+ },
1177
+ {
1178
+ "epoch": 4.994234465086483,
1179
+ "grad_norm": 1.876478672027588,
1180
+ "learning_rate": 0.00016043835616438355,
1181
+ "loss": 0.523,
1182
+ "step": 3900
1183
+ },
1184
+ {
1185
+ "epoch": 5.025624599615631,
1186
+ "grad_norm": 0.982686460018158,
1187
+ "learning_rate": 0.00015941095890410957,
1188
+ "loss": 0.4881,
1189
+ "step": 3925
1190
+ },
1191
+ {
1192
+ "epoch": 5.05765534913517,
1193
+ "grad_norm": 1.5838254690170288,
1194
+ "learning_rate": 0.00015838356164383558,
1195
+ "loss": 0.4756,
1196
+ "step": 3950
1197
+ },
1198
+ {
1199
+ "epoch": 5.089686098654709,
1200
+ "grad_norm": 2.1962034702301025,
1201
+ "learning_rate": 0.00015735616438356166,
1202
+ "loss": 0.5028,
1203
+ "step": 3975
1204
+ },
1205
+ {
1206
+ "epoch": 5.121716848174247,
1207
+ "grad_norm": 4.335826396942139,
1208
+ "learning_rate": 0.00015632876712328767,
1209
+ "loss": 0.5199,
1210
+ "step": 4000
1211
+ },
1212
+ {
1213
+ "epoch": 5.121716848174247,
1214
+ "eval_loss": 0.8298487663269043,
1215
+ "eval_runtime": 192.8428,
1216
+ "eval_samples_per_second": 15.515,
1217
+ "eval_steps_per_second": 1.939,
1218
+ "eval_wer": 0.6859007832898172,
1219
+ "step": 4000
1220
+ }
1221
+ ],
1222
+ "logging_steps": 25,
1223
+ "max_steps": 7800,
1224
+ "num_input_tokens_seen": 0,
1225
+ "num_train_epochs": 10,
1226
+ "save_steps": 400,
1227
+ "stateful_callbacks": {
1228
+ "TrainerControl": {
1229
+ "args": {
1230
+ "should_epoch_stop": false,
1231
+ "should_evaluate": false,
1232
+ "should_log": false,
1233
+ "should_save": true,
1234
+ "should_training_stop": false
1235
+ },
1236
+ "attributes": {}
1237
+ }
1238
+ },
1239
+ "total_flos": 1.6408903738445339e+19,
1240
+ "train_batch_size": 16,
1241
+ "trial_name": null,
1242
+ "trial_params": null
1243
+ }
checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
3
+ size 5304
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
3
+ "activation_dropout": 0.05,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 256,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 55,
89
+ "proj_codevector_dim": 256,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.49.0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 58,
115
+ "xvector_output_dim": 512
116
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5f20b9274cca7dadb1e9cf07691826097d13ca0d8808dc60f2bc11604c120fa
3
+ size 1262045280
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
runs/Feb18_22-23-26_893f794f09cc/events.out.tfevents.1739918097.893f794f09cc.31.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b4538e8f1ee37bb10a3613d2d249df67da13bd4184e793cfb700888b7f29a2
3
+ size 44921
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "54": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "55": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "56": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "57": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "extra_special_tokens": {},
41
+ "model_max_length": 1000000000000000019884624838656,
42
+ "pad_token": "[PAD]",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "[UNK]",
47
+ "word_delimiter_token": "|"
48
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
3
+ size 5304
vocab.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 55,
3
+ "[UNK]": 54,
4
+ "|": 46,
5
+ "ء": 6,
6
+ "آ": 27,
7
+ "أ": 15,
8
+ "ؤ": 30,
9
+ "إ": 24,
10
+ "ئ": 36,
11
+ "ا": 9,
12
+ "ب": 13,
13
+ "ة": 19,
14
+ "ت": 2,
15
+ "ث": 1,
16
+ "ج": 26,
17
+ "ح": 33,
18
+ "خ": 5,
19
+ "د": 10,
20
+ "ذ": 37,
21
+ "ر": 53,
22
+ "ز": 39,
23
+ "س": 11,
24
+ "ش": 7,
25
+ "ص": 28,
26
+ "ض": 49,
27
+ "ط": 40,
28
+ "ظ": 22,
29
+ "ع": 44,
30
+ "غ": 38,
31
+ "ـ": 47,
32
+ "ف": 25,
33
+ "ق": 21,
34
+ "ك": 50,
35
+ "ل": 34,
36
+ "م": 12,
37
+ "ن": 41,
38
+ "ه": 4,
39
+ "و": 17,
40
+ "ى": 45,
41
+ "ي": 51,
42
+ "ً": 29,
43
+ "ٌ": 20,
44
+ "ٍ": 16,
45
+ "َ": 8,
46
+ "ُ": 23,
47
+ "ِ": 35,
48
+ "ّ": 43,
49
+ "ْ": 52,
50
+ "ٰ": 32,
51
+ "چ": 31,
52
+ "ک": 14,
53
+ "ی": 3,
54
+ "ۖ": 42,
55
+ "ۗ": 18,
56
+ "ۚ": 48,
57
+ "ۛ": 0
58
+ }