ayjays132 commited on
Commit
cd7678f
·
verified ·
1 Parent(s): 2a360a0

Upload 13 files

Browse files
added_tokens.json CHANGED
@@ -1,32 +1,34 @@
1
  {
2
- "<active_listening>": 50027,
3
- "<apology>": 50012,
4
- "<bos>": 50002,
5
- "<cls>": 50007,
6
- "<empathy>": 50026,
7
- "<eos>": 50003,
8
- "<farewell>": 50010,
9
- "<greeting>": 50009,
10
- "<mask>": 50008,
11
- "<open_ended_question>": 50028,
12
- "<pad>": 50004,
13
- "<reflective_summary>": 50029,
14
- "<sep>": 50006,
15
- "<thank>": 50011,
16
- "<unk>": 50005,
17
- "<|endoftext|>": 50000,
18
- "<|pad|>": 50001,
19
- "active_listening": 50023,
20
- "amazeballs": 50013,
21
- "bromance": 50021,
22
- "crowdfunding": 50020,
23
- "cryptocurrency": 50014,
24
- "facepalm": 50019,
25
- "hangry": 50022,
26
- "open_ended_question": 50024,
27
- "photobomb": 50018,
28
- "reflective_summary": 50025,
29
- "upcycle": 50017,
30
- "vlog": 50016,
31
- "webinar": 50015
 
 
32
  }
 
1
  {
2
+ "<active_listening>": 50286,
3
+ "<apology>": 50268,
4
+ "<bos>": 50258,
5
+ "<cls>": 50263,
6
+ "<empathy>": 50285,
7
+ "<eos>": 50259,
8
+ "<farewell>": 50266,
9
+ "<greeting>": 50265,
10
+ "<mask>": 50264,
11
+ "<open_ended_question>": 50287,
12
+ "<pad>": 50260,
13
+ "<reflective_summary>": 50288,
14
+ "<sep>": 50262,
15
+ "<thank>": 50267,
16
+ "<unk>": 50261,
17
+ "<|pad|>": 50257,
18
+ "[PAD]": 50269,
19
+ "active_listening": 50282,
20
+ "amazeballs": 50271,
21
+ "bromance": 50279,
22
+ "crowdfunding": 50278,
23
+ "cryptocurrency": 50272,
24
+ "empathy": 50281,
25
+ "facepalm": 50277,
26
+ "hangry": 50280,
27
+ "intelligent": 50270,
28
+ "open_ended_question": 50283,
29
+ "photobomb": 50276,
30
+ "reflective_summary": 50284,
31
+ "upcycle": 50275,
32
+ "vlog": 50274,
33
+ "webinar": 50273
34
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfbfe49a0df1bf0c68b96e533cfcac9860d224e6cd93987cefd99755aeed9293
3
- size 1650346720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da95f6795f77c45c2ab157a5234496e42cf4bda36a3383b9e497c874124f10cd
3
+ size 1419322880
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fd77b5f09d2be234b9c114ec17c3402cfa26c5f985967d53d9ecd811d60c62d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b5567834074c579fcb3e78e7c6dcacf1051d2f04e2ff9f05db6922ac7fbb04
3
  size 1064
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c20b7a90d2b5bd165c0532f7f94a9f412f47fe9fca3bb900b0a9cfd95365d1
3
- size 2114555
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ae0200393412dd1e8cb527e4f3016749edc4fb5820904972a3220115d5fa0a
3
+ size 2114610
tokenizer_config.json CHANGED
@@ -1,6 +1,38 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "50256": {
5
  "content": "<|endoftext|>",
6
  "lstrip": false,
@@ -8,6 +40,262 @@
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
12
  },
13
  "bos_token": "<|endoftext|>",
@@ -15,9 +303,14 @@
15
  "eos_token": "<|endoftext|>",
16
  "max_length": 1024,
17
  "model_max_length": 1024,
 
18
  "pad_token": "<|endoftext|>",
 
19
  "padding": "max_length",
 
 
20
  "tokenizer_class": "GPT2Tokenizer",
 
21
  "truncation_strategy": "longest_first",
22
  "unk_token": "<|endoftext|>"
23
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "11274": {
5
+ "content": "good",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": false
11
+ },
12
+ "26209": {
13
+ "content": "response",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "34191": {
21
+ "content": "happy",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "38986": {
29
+ "content": "environment",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
  "50256": {
37
  "content": "<|endoftext|>",
38
  "lstrip": false,
 
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
+ },
44
+ "50257": {
45
+ "content": "<|pad|>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50258": {
53
+ "content": "<bos>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "50259": {
61
+ "content": "<eos>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "50260": {
69
+ "content": "<pad>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "50261": {
77
+ "content": "<unk>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "50262": {
85
+ "content": "<sep>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "50263": {
93
+ "content": "<cls>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "50264": {
101
+ "content": "<mask>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "50265": {
109
+ "content": "<greeting>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "50266": {
117
+ "content": "<farewell>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "50267": {
125
+ "content": "<thank>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "50268": {
133
+ "content": "<apology>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "50269": {
141
+ "content": "[PAD]",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50270": {
149
+ "content": "intelligent",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50271": {
157
+ "content": "amazeballs",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50272": {
165
+ "content": "cryptocurrency",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50273": {
173
+ "content": "webinar",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50274": {
181
+ "content": "vlog",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50275": {
189
+ "content": "upcycle",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50276": {
197
+ "content": "photobomb",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50277": {
205
+ "content": "facepalm",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50278": {
213
+ "content": "crowdfunding",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50279": {
221
+ "content": "bromance",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50280": {
229
+ "content": "hangry",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50281": {
237
+ "content": "empathy",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50282": {
245
+ "content": "active_listening",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50283": {
253
+ "content": "open_ended_question",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50284": {
261
+ "content": "reflective_summary",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50285": {
269
+ "content": "<empathy>",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50286": {
277
+ "content": "<active_listening>",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50287": {
285
+ "content": "<open_ended_question>",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50288": {
293
+ "content": "<reflective_summary>",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
  }
300
  },
301
  "bos_token": "<|endoftext|>",
 
303
  "eos_token": "<|endoftext|>",
304
  "max_length": 1024,
305
  "model_max_length": 1024,
306
+ "pad_to_multiple_of": null,
307
  "pad_token": "<|endoftext|>",
308
+ "pad_token_type_id": 0,
309
  "padding": "max_length",
310
+ "padding_side": "right",
311
+ "stride": 0,
312
  "tokenizer_class": "GPT2Tokenizer",
313
+ "truncation_side": "right",
314
  "truncation_strategy": "longest_first",
315
  "unk_token": "<|endoftext|>"
316
  }
trainer_state.json CHANGED
@@ -3,136 +3,234 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 141,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.2127659574468085,
13
- "grad_norm": 14.269296646118164,
14
- "learning_rate": 4.858156028368794e-05,
15
- "loss": 17.1938,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.425531914893617,
20
- "grad_norm": 6.576030254364014,
21
- "learning_rate": 4.50354609929078e-05,
22
- "loss": 8.7349,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.6382978723404256,
27
- "grad_norm": 9.514806747436523,
28
- "learning_rate": 4.148936170212766e-05,
29
- "loss": 7.7678,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.851063829787234,
34
- "grad_norm": 9.582891464233398,
35
- "learning_rate": 3.794326241134752e-05,
36
- "loss": 7.0878,
37
  "step": 40
38
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "epoch": 1.0,
41
- "eval_loss": 6.37921142578125,
42
- "eval_runtime": 5.6868,
43
- "eval_samples_per_second": 7.386,
44
- "eval_steps_per_second": 1.934,
45
- "step": 47
46
  },
47
  {
48
  "epoch": 1.0638297872340425,
49
- "grad_norm": 12.990375518798828,
50
- "learning_rate": 3.4397163120567377e-05,
51
- "loss": 6.86,
52
- "step": 50
53
  },
54
  {
55
- "epoch": 1.2765957446808511,
56
- "grad_norm": 12.003615379333496,
57
  "learning_rate": 3.085106382978723e-05,
58
- "loss": 6.5234,
59
- "step": 60
60
  },
61
  {
62
- "epoch": 1.4893617021276595,
63
- "grad_norm": 14.846796989440918,
 
 
 
 
 
 
 
64
  "learning_rate": 2.7304964539007094e-05,
65
- "loss": 6.2994,
66
- "step": 70
67
  },
68
  {
69
- "epoch": 1.702127659574468,
70
- "grad_norm": 15.324792861938477,
 
 
 
 
 
 
 
71
  "learning_rate": 2.3758865248226954e-05,
72
- "loss": 6.1691,
73
- "step": 80
74
  },
75
  {
76
- "epoch": 1.9148936170212765,
77
- "grad_norm": 11.699965476989746,
 
 
 
 
 
 
 
78
  "learning_rate": 2.0212765957446807e-05,
79
- "loss": 6.1812,
80
- "step": 90
 
 
 
 
 
 
 
81
  },
82
  {
83
  "epoch": 2.0,
84
- "eval_loss": 5.777428150177002,
85
- "eval_runtime": 6.2453,
86
- "eval_samples_per_second": 6.725,
87
- "eval_steps_per_second": 1.761,
88
- "step": 94
89
  },
90
  {
91
- "epoch": 2.127659574468085,
92
- "grad_norm": 13.892985343933105,
93
  "learning_rate": 1.6666666666666667e-05,
94
- "loss": 6.0102,
95
- "step": 100
96
  },
97
  {
98
- "epoch": 2.3404255319148937,
99
- "grad_norm": 15.94567584991455,
 
 
 
 
 
 
 
100
  "learning_rate": 1.3120567375886524e-05,
101
- "loss": 5.9313,
102
- "step": 110
103
  },
104
  {
105
- "epoch": 2.5531914893617023,
106
- "grad_norm": 10.468910217285156,
 
 
 
 
 
 
 
107
  "learning_rate": 9.574468085106383e-06,
108
- "loss": 6.013,
109
- "step": 120
110
  },
111
  {
112
- "epoch": 2.7659574468085104,
113
- "grad_norm": 10.299534797668457,
 
 
 
 
 
 
 
114
  "learning_rate": 6.028368794326241e-06,
115
- "loss": 5.9602,
116
- "step": 130
117
  },
118
  {
119
- "epoch": 2.978723404255319,
120
- "grad_norm": 12.492215156555176,
 
 
 
 
 
 
 
121
  "learning_rate": 2.4822695035460995e-06,
122
- "loss": 5.8626,
123
- "step": 140
 
 
 
 
 
 
 
124
  },
125
  {
126
  "epoch": 3.0,
127
- "eval_loss": 5.686648845672607,
128
- "eval_runtime": 6.8751,
129
- "eval_samples_per_second": 6.109,
130
- "eval_steps_per_second": 1.6,
131
- "step": 141
132
  }
133
  ],
134
  "logging_steps": 10,
135
- "max_steps": 141,
136
  "num_input_tokens_seen": 0,
137
  "num_train_epochs": 3,
138
  "save_steps": 500,
 
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 282,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.10638297872340426,
13
+ "grad_norm": 3.6553943157196045,
14
+ "learning_rate": 4.840425531914894e-05,
15
+ "loss": 0.9456,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.2127659574468085,
20
+ "grad_norm": 3.9983913898468018,
21
+ "learning_rate": 4.663120567375887e-05,
22
+ "loss": 0.8552,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.3191489361702128,
27
+ "grad_norm": 3.7922072410583496,
28
+ "learning_rate": 4.48581560283688e-05,
29
+ "loss": 1.0052,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.425531914893617,
34
+ "grad_norm": 3.6140127182006836,
35
+ "learning_rate": 4.3262411347517734e-05,
36
+ "loss": 0.9121,
37
  "step": 40
38
  },
39
+ {
40
+ "epoch": 0.5319148936170213,
41
+ "grad_norm": 3.507614850997925,
42
+ "learning_rate": 4.148936170212766e-05,
43
+ "loss": 0.8598,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.6382978723404256,
48
+ "grad_norm": 4.006348133087158,
49
+ "learning_rate": 3.971631205673759e-05,
50
+ "loss": 0.9708,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.7446808510638298,
55
+ "grad_norm": 4.814829349517822,
56
+ "learning_rate": 3.794326241134752e-05,
57
+ "loss": 0.8753,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.851063829787234,
62
+ "grad_norm": 4.398285388946533,
63
+ "learning_rate": 3.617021276595745e-05,
64
+ "loss": 0.9466,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.9574468085106383,
69
+ "grad_norm": 4.186042785644531,
70
+ "learning_rate": 3.4397163120567377e-05,
71
+ "loss": 0.9408,
72
+ "step": 90
73
+ },
74
  {
75
  "epoch": 1.0,
76
+ "eval_loss": 2.226564645767212,
77
+ "eval_runtime": 9.5187,
78
+ "eval_samples_per_second": 4.412,
79
+ "eval_steps_per_second": 1.156,
80
+ "step": 94
81
  },
82
  {
83
  "epoch": 1.0638297872340425,
84
+ "grad_norm": 3.774606227874756,
85
+ "learning_rate": 3.262411347517731e-05,
86
+ "loss": 0.8182,
87
+ "step": 100
88
  },
89
  {
90
+ "epoch": 1.1702127659574468,
91
+ "grad_norm": 3.641083240509033,
92
  "learning_rate": 3.085106382978723e-05,
93
+ "loss": 0.7849,
94
+ "step": 110
95
  },
96
  {
97
+ "epoch": 1.2765957446808511,
98
+ "grad_norm": 5.060397624969482,
99
+ "learning_rate": 2.9078014184397162e-05,
100
+ "loss": 0.7795,
101
+ "step": 120
102
+ },
103
+ {
104
+ "epoch": 1.3829787234042552,
105
+ "grad_norm": 3.457118272781372,
106
  "learning_rate": 2.7304964539007094e-05,
107
+ "loss": 0.723,
108
+ "step": 130
109
  },
110
  {
111
+ "epoch": 1.4893617021276595,
112
+ "grad_norm": 3.421212673187256,
113
+ "learning_rate": 2.5531914893617022e-05,
114
+ "loss": 0.6819,
115
+ "step": 140
116
+ },
117
+ {
118
+ "epoch": 1.5957446808510638,
119
+ "grad_norm": 3.1989083290100098,
120
  "learning_rate": 2.3758865248226954e-05,
121
+ "loss": 0.6891,
122
+ "step": 150
123
  },
124
  {
125
+ "epoch": 1.702127659574468,
126
+ "grad_norm": 3.486337661743164,
127
+ "learning_rate": 2.1985815602836882e-05,
128
+ "loss": 0.7369,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 1.8085106382978724,
133
+ "grad_norm": 3.4989173412323,
134
  "learning_rate": 2.0212765957446807e-05,
135
+ "loss": 0.7042,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 1.9148936170212765,
140
+ "grad_norm": 2.965428590774536,
141
+ "learning_rate": 1.8439716312056736e-05,
142
+ "loss": 0.712,
143
+ "step": 180
144
  },
145
  {
146
  "epoch": 2.0,
147
+ "eval_loss": 2.416543483734131,
148
+ "eval_runtime": 9.5843,
149
+ "eval_samples_per_second": 4.382,
150
+ "eval_steps_per_second": 1.148,
151
+ "step": 188
152
  },
153
  {
154
+ "epoch": 2.021276595744681,
155
+ "grad_norm": 4.359320640563965,
156
  "learning_rate": 1.6666666666666667e-05,
157
+ "loss": 0.7045,
158
+ "step": 190
159
  },
160
  {
161
+ "epoch": 2.127659574468085,
162
+ "grad_norm": 3.513986825942993,
163
+ "learning_rate": 1.4893617021276596e-05,
164
+ "loss": 0.6259,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 2.2340425531914896,
169
+ "grad_norm": 4.127784252166748,
170
  "learning_rate": 1.3120567375886524e-05,
171
+ "loss": 0.6355,
172
+ "step": 210
173
  },
174
  {
175
+ "epoch": 2.3404255319148937,
176
+ "grad_norm": 3.798154592514038,
177
+ "learning_rate": 1.1347517730496454e-05,
178
+ "loss": 0.5645,
179
+ "step": 220
180
+ },
181
+ {
182
+ "epoch": 2.4468085106382977,
183
+ "grad_norm": 3.0239367485046387,
184
  "learning_rate": 9.574468085106383e-06,
185
+ "loss": 0.6694,
186
+ "step": 230
187
  },
188
  {
189
+ "epoch": 2.5531914893617023,
190
+ "grad_norm": 3.148362398147583,
191
+ "learning_rate": 7.801418439716313e-06,
192
+ "loss": 0.6103,
193
+ "step": 240
194
+ },
195
+ {
196
+ "epoch": 2.6595744680851063,
197
+ "grad_norm": 4.111635684967041,
198
  "learning_rate": 6.028368794326241e-06,
199
+ "loss": 0.5299,
200
+ "step": 250
201
  },
202
  {
203
+ "epoch": 2.7659574468085104,
204
+ "grad_norm": 3.042776107788086,
205
+ "learning_rate": 4.255319148936171e-06,
206
+ "loss": 0.5928,
207
+ "step": 260
208
+ },
209
+ {
210
+ "epoch": 2.872340425531915,
211
+ "grad_norm": 3.5736513137817383,
212
  "learning_rate": 2.4822695035460995e-06,
213
+ "loss": 0.6444,
214
+ "step": 270
215
+ },
216
+ {
217
+ "epoch": 2.978723404255319,
218
+ "grad_norm": 3.4036762714385986,
219
+ "learning_rate": 7.092198581560284e-07,
220
+ "loss": 0.5531,
221
+ "step": 280
222
  },
223
  {
224
  "epoch": 3.0,
225
+ "eval_loss": 2.504506826400757,
226
+ "eval_runtime": 14.1693,
227
+ "eval_samples_per_second": 2.964,
228
+ "eval_steps_per_second": 0.776,
229
+ "step": 282
230
  }
231
  ],
232
  "logging_steps": 10,
233
+ "max_steps": 282,
234
  "num_input_tokens_seen": 0,
235
  "num_train_epochs": 3,
236
  "save_steps": 500,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a6300bd64a2133832e68dd195890f548ec75ffbdaa6208036f80c4a9b9edd6a
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09957992ab27156ce48711b6f9e348d9a1a021722d41a33fbaa4988104c8e58
3
  size 5112