razhan commited on
Commit
ba470b0
·
verified ·
1 Parent(s): 174ffc0

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +27 -27
  2. eval_results.json +21 -21
  3. train_results.json +6 -6
  4. trainer_state.json +318 -94
all_results.json CHANGED
@@ -1,30 +1,30 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_avg_cer": 0.8497723918509256,
4
- "eval_avg_wer": 1.1917436443504037,
5
- "eval_gilaki_cer": 0.8534621578099839,
6
- "eval_gilaki_wer": 1.2232304900181488,
7
- "eval_hawrami_cer": 0.7705360586193598,
8
- "eval_hawrami_wer": 1.0631808278867103,
9
- "eval_laki_kurdish_cer": 0.8054118583366494,
10
- "eval_laki_kurdish_wer": 1.0567951318458417,
11
- "eval_loss": 4.144251346588135,
12
- "eval_mazanderani_cer": 0.7783553875236295,
13
- "eval_mazanderani_wer": 1.1377245508982037,
14
- "eval_runtime": 51.2787,
15
- "eval_samples": 64,
16
- "eval_samples_per_second": 7.235,
17
- "eval_southern_kurdish_cer": 1.0438808373590982,
18
- "eval_southern_kurdish_wer": 1.603921568627451,
19
- "eval_steps_per_second": 0.059,
20
- "eval_talysh_cer": 0.8333333333333334,
21
- "eval_talysh_wer": 1.0,
22
- "eval_zazaki_cer": 0.8634271099744245,
23
- "eval_zazaki_wer": 1.2573529411764706,
24
- "total_flos": 2.0054710812672e+17,
25
- "train_loss": 4.300785064697266,
26
- "train_runtime": 576.5889,
27
- "train_samples": 128,
28
- "train_samples_per_second": 5.363,
29
- "train_steps_per_second": 0.028
30
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_avg_cer": 0.2678389158869433,
4
+ "eval_avg_wer": 0.7003291969579674,
5
+ "eval_gilaki_cer": 0.38182211642501623,
6
+ "eval_gilaki_wer": 0.9606072252113759,
7
+ "eval_hawrami_cer": 0.08648390486197345,
8
+ "eval_hawrami_wer": 0.4144927536231884,
9
+ "eval_laki_kurdish_cer": 0.16528387831462502,
10
+ "eval_laki_kurdish_wer": 0.603680981595092,
11
+ "eval_loss": 0.5420735478401184,
12
+ "eval_mazanderani_cer": 0.2510331798323297,
13
+ "eval_mazanderani_wer": 0.6836474783494652,
14
+ "eval_runtime": 458.9768,
15
+ "eval_samples": 3258,
16
+ "eval_samples_per_second": 7.098,
17
+ "eval_southern_kurdish_cer": 0.17209076399972242,
18
+ "eval_southern_kurdish_wer": 0.5609543712991989,
19
+ "eval_steps_per_second": 0.057,
20
+ "eval_talysh_cer": 0.5,
21
+ "eval_talysh_wer": 0.9166666666666666,
22
+ "eval_zazaki_cer": 0.31815856777493606,
23
+ "eval_zazaki_wer": 0.7622549019607843,
24
+ "total_flos": 5.41529079939072e+18,
25
+ "train_loss": 0.6912094768954486,
26
+ "train_runtime": 12274.8821,
27
+ "train_samples": 20873,
28
+ "train_samples_per_second": 6.802,
29
+ "train_steps_per_second": 0.053
30
  }
eval_results.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_avg_cer": 0.8497723918509256,
4
- "eval_avg_wer": 1.1917436443504037,
5
- "eval_gilaki_cer": 0.8534621578099839,
6
- "eval_gilaki_wer": 1.2232304900181488,
7
- "eval_hawrami_cer": 0.7705360586193598,
8
- "eval_hawrami_wer": 1.0631808278867103,
9
- "eval_laki_kurdish_cer": 0.8054118583366494,
10
- "eval_laki_kurdish_wer": 1.0567951318458417,
11
- "eval_loss": 4.144251346588135,
12
- "eval_mazanderani_cer": 0.7783553875236295,
13
- "eval_mazanderani_wer": 1.1377245508982037,
14
- "eval_runtime": 51.2787,
15
- "eval_samples": 64,
16
- "eval_samples_per_second": 7.235,
17
- "eval_southern_kurdish_cer": 1.0438808373590982,
18
- "eval_southern_kurdish_wer": 1.603921568627451,
19
- "eval_steps_per_second": 0.059,
20
- "eval_talysh_cer": 0.8333333333333334,
21
- "eval_talysh_wer": 1.0,
22
- "eval_zazaki_cer": 0.8634271099744245,
23
- "eval_zazaki_wer": 1.2573529411764706
24
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_avg_cer": 0.2678389158869433,
4
+ "eval_avg_wer": 0.7003291969579674,
5
+ "eval_gilaki_cer": 0.38182211642501623,
6
+ "eval_gilaki_wer": 0.9606072252113759,
7
+ "eval_hawrami_cer": 0.08648390486197345,
8
+ "eval_hawrami_wer": 0.4144927536231884,
9
+ "eval_laki_kurdish_cer": 0.16528387831462502,
10
+ "eval_laki_kurdish_wer": 0.603680981595092,
11
+ "eval_loss": 0.5420735478401184,
12
+ "eval_mazanderani_cer": 0.2510331798323297,
13
+ "eval_mazanderani_wer": 0.6836474783494652,
14
+ "eval_runtime": 458.9768,
15
+ "eval_samples": 3258,
16
+ "eval_samples_per_second": 7.098,
17
+ "eval_southern_kurdish_cer": 0.17209076399972242,
18
+ "eval_southern_kurdish_wer": 0.5609543712991989,
19
+ "eval_steps_per_second": 0.057,
20
+ "eval_talysh_cer": 0.5,
21
+ "eval_talysh_wer": 0.9166666666666666,
22
+ "eval_zazaki_cer": 0.31815856777493606,
23
+ "eval_zazaki_wer": 0.7622549019607843
24
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 4.0,
3
- "total_flos": 2.0054710812672e+17,
4
- "train_loss": 4.300785064697266,
5
- "train_runtime": 576.5889,
6
- "train_samples": 128,
7
- "train_samples_per_second": 5.363,
8
- "train_steps_per_second": 0.028
9
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "total_flos": 5.41529079939072e+18,
4
+ "train_loss": 0.6912094768954486,
5
+ "train_runtime": 12274.8821,
6
+ "train_samples": 20873,
7
+ "train_samples_per_second": 6.802,
8
+ "train_steps_per_second": 0.053
9
  }
trainer_state.json CHANGED
@@ -3,119 +3,343 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
- "eval_avg_cer": 0.9148516180407876,
14
- "eval_avg_wer": 1.2659922494294373,
15
- "eval_gilaki_cer": 1.0144927536231885,
16
- "eval_gilaki_wer": 1.3956442831215972,
17
- "eval_hawrami_cer": 0.7736212880833012,
18
- "eval_hawrami_wer": 1.0631808278867103,
19
- "eval_laki_kurdish_cer": 0.8137684042976522,
20
- "eval_laki_kurdish_wer": 1.0669371196754565,
21
- "eval_loss": 4.381021022796631,
22
- "eval_mazanderani_cer": 1.031663516068053,
23
- "eval_mazanderani_wer": 1.467065868263473,
24
- "eval_runtime": 51.1074,
25
- "eval_samples_per_second": 7.259,
26
- "eval_southern_kurdish_cer": 1.0330112721417068,
27
- "eval_southern_kurdish_wer": 1.611764705882353,
28
- "eval_steps_per_second": 0.059,
29
- "eval_talysh_cer": 0.875,
30
- "eval_talysh_wer": 1.0,
31
- "eval_zazaki_cer": 0.8624040920716113,
32
- "eval_zazaki_wer": 1.2573529411764706,
33
- "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_avg_cer": 0.8929720647973036,
38
- "eval_avg_wer": 1.2394954801228035,
39
- "eval_gilaki_cer": 0.8586956521739131,
40
- "eval_gilaki_wer": 1.2141560798548094,
41
- "eval_hawrami_cer": 0.7736212880833012,
42
- "eval_hawrami_wer": 1.0631808278867103,
43
- "eval_laki_kurdish_cer": 0.8070035813768405,
44
- "eval_laki_kurdish_wer": 1.0649087221095335,
45
- "eval_loss": 4.374571323394775,
46
- "eval_mazanderani_cer": 1.0430056710775046,
47
- "eval_mazanderani_wer": 1.467065868263473,
48
- "eval_runtime": 51.7278,
49
- "eval_samples_per_second": 7.172,
50
- "eval_southern_kurdish_cer": 1.0326086956521738,
51
- "eval_southern_kurdish_wer": 1.6098039215686275,
52
- "eval_steps_per_second": 0.058,
53
- "eval_talysh_cer": 0.875,
54
- "eval_talysh_wer": 1.0,
55
- "eval_zazaki_cer": 0.8608695652173913,
56
- "eval_zazaki_wer": 1.2573529411764706,
57
- "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 3.0,
61
- "eval_avg_cer": 0.8868646660244508,
62
- "eval_avg_wer": 1.2392111739383087,
63
- "eval_gilaki_cer": 0.8595008051529791,
64
- "eval_gilaki_wer": 1.2177858439201452,
65
- "eval_hawrami_cer": 0.7743925954492865,
66
- "eval_hawrami_wer": 1.0675381263616557,
67
- "eval_laki_kurdish_cer": 0.8085953044170314,
68
- "eval_laki_kurdish_wer": 1.0628803245436105,
69
- "eval_loss": 4.324982643127441,
70
- "eval_mazanderani_cer": 1.0387523629489603,
71
- "eval_mazanderani_wer": 1.4610778443113772,
72
- "eval_runtime": 51.3386,
73
- "eval_samples_per_second": 7.227,
74
- "eval_southern_kurdish_cer": 1.0326086956521738,
75
- "eval_southern_kurdish_wer": 1.607843137254902,
76
- "eval_steps_per_second": 0.058,
77
- "eval_talysh_cer": 0.8333333333333334,
78
- "eval_talysh_wer": 1.0,
79
- "eval_zazaki_cer": 0.8608695652173913,
80
- "eval_zazaki_wer": 1.2573529411764706,
81
- "step": 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  },
83
  {
84
  "epoch": 4.0,
85
- "eval_avg_cer": 0.8497723918509256,
86
- "eval_avg_wer": 1.1917436443504037,
87
- "eval_gilaki_cer": 0.8534621578099839,
88
- "eval_gilaki_wer": 1.2232304900181488,
89
- "eval_hawrami_cer": 0.7705360586193598,
90
- "eval_hawrami_wer": 1.0631808278867103,
91
- "eval_laki_kurdish_cer": 0.8054118583366494,
92
- "eval_laki_kurdish_wer": 1.0567951318458417,
93
- "eval_loss": 4.144251346588135,
94
- "eval_mazanderani_cer": 0.7783553875236295,
95
- "eval_mazanderani_wer": 1.1377245508982037,
96
- "eval_runtime": 51.6207,
97
- "eval_samples_per_second": 7.187,
98
- "eval_southern_kurdish_cer": 1.0438808373590982,
99
- "eval_southern_kurdish_wer": 1.603921568627451,
100
- "eval_steps_per_second": 0.058,
101
- "eval_talysh_cer": 0.8333333333333334,
102
- "eval_talysh_wer": 1.0,
103
- "eval_zazaki_cer": 0.8634271099744245,
104
- "eval_zazaki_wer": 1.2573529411764706,
105
- "step": 16
106
  },
107
  {
108
  "epoch": 4.0,
109
- "step": 16,
110
- "total_flos": 2.0054710812672e+17,
111
- "train_loss": 4.300785064697266,
112
- "train_runtime": 576.5889,
113
- "train_samples_per_second": 5.363,
114
- "train_steps_per_second": 0.028
115
  }
116
  ],
117
  "logging_steps": 20,
118
- "max_steps": 16,
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 4,
121
  "save_steps": 500,
@@ -131,8 +355,8 @@
131
  "attributes": {}
132
  }
133
  },
134
- "total_flos": 2.0054710812672e+17,
135
- "train_batch_size": 256,
136
  "trial_name": null,
137
  "trial_params": null
138
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 656,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.12195121951219512,
13
+ "grad_norm": 42.956241607666016,
14
+ "learning_rate": 1.5e-06,
15
+ "loss": 3.946,
16
+ "step": 20
17
+ },
18
+ {
19
+ "epoch": 0.24390243902439024,
20
+ "grad_norm": 18.780467987060547,
21
+ "learning_rate": 3.5e-06,
22
+ "loss": 2.801,
23
+ "step": 40
24
+ },
25
+ {
26
+ "epoch": 0.36585365853658536,
27
+ "grad_norm": 10.132372856140137,
28
+ "learning_rate": 5.500000000000001e-06,
29
+ "loss": 1.829,
30
+ "step": 60
31
+ },
32
+ {
33
+ "epoch": 0.4878048780487805,
34
+ "grad_norm": 6.34807825088501,
35
+ "learning_rate": 7.500000000000001e-06,
36
+ "loss": 1.1664,
37
+ "step": 80
38
+ },
39
+ {
40
+ "epoch": 0.6097560975609756,
41
+ "grad_norm": 3.893784523010254,
42
+ "learning_rate": 9.5e-06,
43
+ "loss": 0.8395,
44
+ "step": 100
45
+ },
46
+ {
47
+ "epoch": 0.7317073170731707,
48
+ "grad_norm": 3.6245577335357666,
49
+ "learning_rate": 9.73021582733813e-06,
50
+ "loss": 0.6985,
51
+ "step": 120
52
+ },
53
+ {
54
+ "epoch": 0.8536585365853658,
55
+ "grad_norm": 3.2853732109069824,
56
+ "learning_rate": 9.370503597122303e-06,
57
+ "loss": 0.6198,
58
+ "step": 140
59
+ },
60
+ {
61
+ "epoch": 0.975609756097561,
62
+ "grad_norm": 3.1876819133758545,
63
+ "learning_rate": 9.010791366906476e-06,
64
+ "loss": 0.5933,
65
+ "step": 160
66
+ },
67
  {
68
  "epoch": 1.0,
69
+ "eval_avg_cer": 0.35838623267570763,
70
+ "eval_avg_wer": 0.8217081943985028,
71
+ "eval_gilaki_cer": 0.38532785111447737,
72
+ "eval_gilaki_wer": 0.9969254419677172,
73
+ "eval_hawrami_cer": 0.11622886617633012,
74
+ "eval_hawrami_wer": 0.5257112184648417,
75
+ "eval_laki_kurdish_cer": 0.24613632666341304,
76
+ "eval_laki_kurdish_wer": 0.7856850715746422,
77
+ "eval_loss": 0.7336705923080444,
78
+ "eval_mazanderani_cer": 0.3138505136379738,
79
+ "eval_mazanderani_wer": 0.8884360672440142,
80
+ "eval_runtime": 457.0137,
81
+ "eval_samples_per_second": 7.129,
82
+ "eval_southern_kurdish_cer": 0.21365623482062313,
83
+ "eval_southern_kurdish_wer": 0.6900034831069314,
84
+ "eval_steps_per_second": 0.057,
85
+ "eval_talysh_cer": 0.5,
86
+ "eval_talysh_wer": 0.9166666666666666,
87
+ "eval_zazaki_cer": 0.7335038363171356,
88
+ "eval_zazaki_wer": 0.9485294117647058,
89
+ "step": 164
90
+ },
91
+ {
92
+ "epoch": 1.0975609756097562,
93
+ "grad_norm": 3.3254923820495605,
94
+ "learning_rate": 8.651079136690648e-06,
95
+ "loss": 0.5337,
96
+ "step": 180
97
+ },
98
+ {
99
+ "epoch": 1.2195121951219512,
100
+ "grad_norm": 3.416600227355957,
101
+ "learning_rate": 8.29136690647482e-06,
102
+ "loss": 0.5054,
103
+ "step": 200
104
+ },
105
+ {
106
+ "epoch": 1.3414634146341464,
107
+ "grad_norm": 3.544034481048584,
108
+ "learning_rate": 7.931654676258992e-06,
109
+ "loss": 0.4915,
110
+ "step": 220
111
+ },
112
+ {
113
+ "epoch": 1.4634146341463414,
114
+ "grad_norm": 2.7927117347717285,
115
+ "learning_rate": 7.571942446043166e-06,
116
+ "loss": 0.47,
117
+ "step": 240
118
+ },
119
+ {
120
+ "epoch": 1.5853658536585367,
121
+ "grad_norm": 3.406677007675171,
122
+ "learning_rate": 7.212230215827338e-06,
123
+ "loss": 0.4681,
124
+ "step": 260
125
+ },
126
+ {
127
+ "epoch": 1.7073170731707317,
128
+ "grad_norm": 2.7277846336364746,
129
+ "learning_rate": 6.852517985611511e-06,
130
+ "loss": 0.4653,
131
+ "step": 280
132
+ },
133
+ {
134
+ "epoch": 1.8292682926829267,
135
+ "grad_norm": 2.8852195739746094,
136
+ "learning_rate": 6.4928057553956835e-06,
137
+ "loss": 0.4281,
138
+ "step": 300
139
+ },
140
+ {
141
+ "epoch": 1.951219512195122,
142
+ "grad_norm": 3.2276437282562256,
143
+ "learning_rate": 6.133093525179856e-06,
144
+ "loss": 0.4436,
145
+ "step": 320
146
  },
147
  {
148
  "epoch": 2.0,
149
+ "eval_avg_cer": 0.2926722864574391,
150
+ "eval_avg_wer": 0.7377513408917141,
151
+ "eval_gilaki_cer": 0.38182211642501623,
152
+ "eval_gilaki_wer": 0.9723289777094543,
153
+ "eval_hawrami_cer": 0.09332314452192186,
154
+ "eval_hawrami_wer": 0.44186795491143316,
155
+ "eval_laki_kurdish_cer": 0.1845615747519115,
156
+ "eval_laki_kurdish_wer": 0.6486707566462168,
157
+ "eval_loss": 0.5930325984954834,
158
+ "eval_mazanderani_cer": 0.29389538316212066,
159
+ "eval_mazanderani_wer": 0.7514009169638308,
160
+ "eval_runtime": 456.2009,
161
+ "eval_samples_per_second": 7.142,
162
+ "eval_southern_kurdish_cer": 0.17975851779890362,
163
+ "eval_southern_kurdish_wer": 0.5950888192267503,
164
+ "eval_steps_per_second": 0.057,
165
+ "eval_talysh_cer": 0.5,
166
+ "eval_talysh_wer": 0.9166666666666666,
167
+ "eval_zazaki_cer": 0.4153452685421995,
168
+ "eval_zazaki_wer": 0.8382352941176471,
169
+ "step": 328
170
+ },
171
+ {
172
+ "epoch": 2.073170731707317,
173
+ "grad_norm": 3.5604612827301025,
174
+ "learning_rate": 5.773381294964029e-06,
175
+ "loss": 0.4201,
176
+ "step": 340
177
+ },
178
+ {
179
+ "epoch": 2.1951219512195124,
180
+ "grad_norm": 2.8034543991088867,
181
+ "learning_rate": 5.413669064748202e-06,
182
+ "loss": 0.407,
183
+ "step": 360
184
+ },
185
+ {
186
+ "epoch": 2.317073170731707,
187
+ "grad_norm": 3.1773579120635986,
188
+ "learning_rate": 5.053956834532374e-06,
189
+ "loss": 0.3865,
190
+ "step": 380
191
+ },
192
+ {
193
+ "epoch": 2.4390243902439024,
194
+ "grad_norm": 2.6149208545684814,
195
+ "learning_rate": 4.6942446043165475e-06,
196
+ "loss": 0.4109,
197
+ "step": 400
198
+ },
199
+ {
200
+ "epoch": 2.5609756097560976,
201
+ "grad_norm": 2.5704352855682373,
202
+ "learning_rate": 4.33453237410072e-06,
203
+ "loss": 0.3879,
204
+ "step": 420
205
+ },
206
+ {
207
+ "epoch": 2.682926829268293,
208
+ "grad_norm": 2.887918472290039,
209
+ "learning_rate": 3.974820143884892e-06,
210
+ "loss": 0.3831,
211
+ "step": 440
212
+ },
213
+ {
214
+ "epoch": 2.8048780487804876,
215
+ "grad_norm": 2.9402122497558594,
216
+ "learning_rate": 3.6151079136690647e-06,
217
+ "loss": 0.3946,
218
+ "step": 460
219
+ },
220
+ {
221
+ "epoch": 2.926829268292683,
222
+ "grad_norm": 2.4520623683929443,
223
+ "learning_rate": 3.2553956834532375e-06,
224
+ "loss": 0.387,
225
+ "step": 480
226
  },
227
  {
228
  "epoch": 3.0,
229
+ "eval_avg_cer": 0.2707400949444132,
230
+ "eval_avg_wer": 0.7061799250572226,
231
+ "eval_gilaki_cer": 0.3735122267907379,
232
+ "eval_gilaki_wer": 0.9523443504996156,
233
+ "eval_hawrami_cer": 0.08913936383608749,
234
+ "eval_hawrami_wer": 0.42297369833601717,
235
+ "eval_laki_kurdish_cer": 0.1678054335448186,
236
+ "eval_laki_kurdish_wer": 0.6094069529652352,
237
+ "eval_loss": 0.5509017109870911,
238
+ "eval_mazanderani_cer": 0.24359428503955602,
239
+ "eval_mazanderani_wer": 0.6821192052980133,
240
+ "eval_runtime": 459.5728,
241
+ "eval_samples_per_second": 7.089,
242
+ "eval_southern_kurdish_cer": 0.1871140101311498,
243
+ "eval_southern_kurdish_wer": 0.5705329153605015,
244
+ "eval_steps_per_second": 0.057,
245
+ "eval_talysh_cer": 0.5,
246
+ "eval_talysh_wer": 0.9166666666666666,
247
+ "eval_zazaki_cer": 0.3340153452685422,
248
+ "eval_zazaki_wer": 0.7892156862745098,
249
+ "step": 492
250
+ },
251
+ {
252
+ "epoch": 3.048780487804878,
253
+ "grad_norm": 2.502338409423828,
254
+ "learning_rate": 2.89568345323741e-06,
255
+ "loss": 0.3788,
256
+ "step": 500
257
+ },
258
+ {
259
+ "epoch": 3.1707317073170733,
260
+ "grad_norm": 2.474639654159546,
261
+ "learning_rate": 2.5359712230215827e-06,
262
+ "loss": 0.3829,
263
+ "step": 520
264
+ },
265
+ {
266
+ "epoch": 3.292682926829268,
267
+ "grad_norm": 2.5425171852111816,
268
+ "learning_rate": 2.1762589928057555e-06,
269
+ "loss": 0.3557,
270
+ "step": 540
271
+ },
272
+ {
273
+ "epoch": 3.4146341463414633,
274
+ "grad_norm": 2.498109817504883,
275
+ "learning_rate": 1.8165467625899283e-06,
276
+ "loss": 0.3607,
277
+ "step": 560
278
+ },
279
+ {
280
+ "epoch": 3.5365853658536586,
281
+ "grad_norm": 2.560028076171875,
282
+ "learning_rate": 1.4568345323741009e-06,
283
+ "loss": 0.3544,
284
+ "step": 580
285
+ },
286
+ {
287
+ "epoch": 3.658536585365854,
288
+ "grad_norm": 2.3217689990997314,
289
+ "learning_rate": 1.0971223021582735e-06,
290
+ "loss": 0.3614,
291
+ "step": 600
292
+ },
293
+ {
294
+ "epoch": 3.7804878048780486,
295
+ "grad_norm": 2.47906231880188,
296
+ "learning_rate": 7.37410071942446e-07,
297
+ "loss": 0.3608,
298
+ "step": 620
299
+ },
300
+ {
301
+ "epoch": 3.902439024390244,
302
+ "grad_norm": 2.4336979389190674,
303
+ "learning_rate": 3.7769784172661875e-07,
304
+ "loss": 0.3575,
305
+ "step": 640
306
  },
307
  {
308
  "epoch": 4.0,
309
+ "eval_avg_cer": 0.2678389158869433,
310
+ "eval_avg_wer": 0.7003291969579674,
311
+ "eval_gilaki_cer": 0.38182211642501623,
312
+ "eval_gilaki_wer": 0.9606072252113759,
313
+ "eval_hawrami_cer": 0.08648390486197345,
314
+ "eval_hawrami_wer": 0.4144927536231884,
315
+ "eval_laki_kurdish_cer": 0.16528387831462502,
316
+ "eval_laki_kurdish_wer": 0.603680981595092,
317
+ "eval_loss": 0.5420735478401184,
318
+ "eval_mazanderani_cer": 0.2510331798323297,
319
+ "eval_mazanderani_wer": 0.6836474783494652,
320
+ "eval_runtime": 457.2923,
321
+ "eval_samples_per_second": 7.125,
322
+ "eval_southern_kurdish_cer": 0.17209076399972242,
323
+ "eval_southern_kurdish_wer": 0.5609543712991989,
324
+ "eval_steps_per_second": 0.057,
325
+ "eval_talysh_cer": 0.5,
326
+ "eval_talysh_wer": 0.9166666666666666,
327
+ "eval_zazaki_cer": 0.31815856777493606,
328
+ "eval_zazaki_wer": 0.7622549019607843,
329
+ "step": 656
330
  },
331
  {
332
  "epoch": 4.0,
333
+ "step": 656,
334
+ "total_flos": 5.41529079939072e+18,
335
+ "train_loss": 0.6912094768954486,
336
+ "train_runtime": 12274.8821,
337
+ "train_samples_per_second": 6.802,
338
+ "train_steps_per_second": 0.053
339
  }
340
  ],
341
  "logging_steps": 20,
342
+ "max_steps": 656,
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 4,
345
  "save_steps": 500,
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 5.41529079939072e+18,
359
+ "train_batch_size": 128,
360
  "trial_name": null,
361
  "trial_params": null
362
  }