Mongjin commited on
Commit
8f61bcd
·
1 Parent(s): 47544c8

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gogamza/kobart-base-v2",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForDialogueGeneration"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "author": "Heewon Jeon([email protected])",
12
+ "bos_token_id": 1,
13
+ "classif_dropout": 0.1,
14
+ "classifier_dropout": 0.1,
15
+ "d_model": 768,
16
+ "decoder_attention_heads": 16,
17
+ "decoder_ffn_dim": 3072,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 6,
20
+ "decoder_start_token_id": 1,
21
+ "do_blenderbot_90_layernorm": false,
22
+ "dropout": 0.1,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 3072,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 6,
27
+ "eos_token_id": 1,
28
+ "extra_pos_embeddings": 2,
29
+ "force_bos_token_to_be_generated": false,
30
+ "forced_eos_token_id": 1,
31
+ "gradient_checkpointing": false,
32
+ "id2label": {
33
+ "0": "NEGATIVE",
34
+ "1": "POSITIVE"
35
+ },
36
+ "init_std": 0.02,
37
+ "is_encoder_decoder": true,
38
+ "kobart_version": 2.0,
39
+ "label2id": {
40
+ "NEGATIVE": 0,
41
+ "POSITIVE": 1
42
+ },
43
+ "max_position_embeddings": 1026,
44
+ "model_type": "bart",
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_hidden_layers": 6,
48
+ "pad_token_id": 3,
49
+ "scale_embedding": false,
50
+ "static_position_embeddings": false,
51
+ "tokenizer_class": "PreTrainedTokenizerFast",
52
+ "torch_dtype": "float32",
53
+ "transformers_version": "4.17.0",
54
+ "use_cache": true,
55
+ "vocab_size": 30004
56
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339fd69214325799a030d941184d800e794df5c4a9e19ba69dc3f408d55220ca
3
+ size 495883396
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "</s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "additional_special_tokens": ["<agent>", "<user>", "<agent_mem>", "<user_mem>"]}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"additional_special_tokens": ["<agent>", "<user>", "<agent_mem>", "<user_mem>"], "special_tokens_map_file": "/home/mongjin/.cache/huggingface/transformers/a87d2ed77831bb40ce806a97c04126addf5ecc82b3e23ecf916b2a4acdb9c29a.c23d5e62137984cf842a885705037b25b156747d145406702932d5f5d5e7c88e", "name_or_path": "gogamza/kobart-base-v2", "tokenizer_class": "PreTrainedTokenizerFast"}
trainer_state.json ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 27925,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.09,
12
+ "learning_rate": 3e-05,
13
+ "loss": 3.5105,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.18,
18
+ "learning_rate": 2.945305378304467e-05,
19
+ "loss": 1.6044,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.27,
24
+ "learning_rate": 2.8906107566089334e-05,
25
+ "loss": 1.5561,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.36,
30
+ "learning_rate": 2.8359161349134002e-05,
31
+ "loss": 1.5065,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.45,
36
+ "learning_rate": 2.781221513217867e-05,
37
+ "loss": 1.4607,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.54,
42
+ "learning_rate": 2.726526891522334e-05,
43
+ "loss": 1.4575,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.63,
48
+ "learning_rate": 2.6718322698268003e-05,
49
+ "loss": 1.432,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.72,
54
+ "learning_rate": 2.617137648131267e-05,
55
+ "loss": 1.4269,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.81,
60
+ "learning_rate": 2.562443026435734e-05,
61
+ "loss": 1.4204,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.9,
66
+ "learning_rate": 2.5077484047402008e-05,
67
+ "loss": 1.4037,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.98,
72
+ "learning_rate": 2.4530537830446673e-05,
73
+ "loss": 1.4064,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "eval_bleu": 0.8646,
79
+ "eval_gen_len": 16.6,
80
+ "eval_loss": 1.450329303741455,
81
+ "eval_runtime": 368.5927,
82
+ "eval_samples_per_second": 56.358,
83
+ "eval_steps_per_second": 0.882,
84
+ "step": 5585
85
+ },
86
+ {
87
+ "epoch": 1.07,
88
+ "learning_rate": 2.398359161349134e-05,
89
+ "loss": 1.3601,
90
+ "step": 6000
91
+ },
92
+ {
93
+ "epoch": 1.16,
94
+ "learning_rate": 2.343664539653601e-05,
95
+ "loss": 1.3596,
96
+ "step": 6500
97
+ },
98
+ {
99
+ "epoch": 1.25,
100
+ "learning_rate": 2.2889699179580678e-05,
101
+ "loss": 1.3655,
102
+ "step": 7000
103
+ },
104
+ {
105
+ "epoch": 1.34,
106
+ "learning_rate": 2.2342752962625342e-05,
107
+ "loss": 1.326,
108
+ "step": 7500
109
+ },
110
+ {
111
+ "epoch": 1.43,
112
+ "learning_rate": 2.179580674567001e-05,
113
+ "loss": 1.3327,
114
+ "step": 8000
115
+ },
116
+ {
117
+ "epoch": 1.52,
118
+ "learning_rate": 2.124886052871468e-05,
119
+ "loss": 1.3367,
120
+ "step": 8500
121
+ },
122
+ {
123
+ "epoch": 1.61,
124
+ "learning_rate": 2.0701914311759347e-05,
125
+ "loss": 1.3305,
126
+ "step": 9000
127
+ },
128
+ {
129
+ "epoch": 1.7,
130
+ "learning_rate": 2.0154968094804012e-05,
131
+ "loss": 1.3218,
132
+ "step": 9500
133
+ },
134
+ {
135
+ "epoch": 1.79,
136
+ "learning_rate": 1.960802187784868e-05,
137
+ "loss": 1.3302,
138
+ "step": 10000
139
+ },
140
+ {
141
+ "epoch": 1.88,
142
+ "learning_rate": 1.906107566089335e-05,
143
+ "loss": 1.3173,
144
+ "step": 10500
145
+ },
146
+ {
147
+ "epoch": 1.97,
148
+ "learning_rate": 1.8514129443938013e-05,
149
+ "loss": 1.3306,
150
+ "step": 11000
151
+ },
152
+ {
153
+ "epoch": 2.0,
154
+ "eval_bleu": 0.8302,
155
+ "eval_gen_len": 16.9,
156
+ "eval_loss": 1.422479271888733,
157
+ "eval_runtime": 368.3493,
158
+ "eval_samples_per_second": 56.395,
159
+ "eval_steps_per_second": 0.882,
160
+ "step": 11170
161
+ },
162
+ {
163
+ "epoch": 2.06,
164
+ "learning_rate": 1.7967183226982678e-05,
165
+ "loss": 1.2841,
166
+ "step": 11500
167
+ },
168
+ {
169
+ "epoch": 2.15,
170
+ "learning_rate": 1.7420237010027346e-05,
171
+ "loss": 1.287,
172
+ "step": 12000
173
+ },
174
+ {
175
+ "epoch": 2.24,
176
+ "learning_rate": 1.6873290793072015e-05,
177
+ "loss": 1.2772,
178
+ "step": 12500
179
+ },
180
+ {
181
+ "epoch": 2.33,
182
+ "learning_rate": 1.632634457611668e-05,
183
+ "loss": 1.2802,
184
+ "step": 13000
185
+ },
186
+ {
187
+ "epoch": 2.42,
188
+ "learning_rate": 1.5779398359161348e-05,
189
+ "loss": 1.27,
190
+ "step": 13500
191
+ },
192
+ {
193
+ "epoch": 2.51,
194
+ "learning_rate": 1.5232452142206018e-05,
195
+ "loss": 1.273,
196
+ "step": 14000
197
+ },
198
+ {
199
+ "epoch": 2.6,
200
+ "learning_rate": 1.4685505925250684e-05,
201
+ "loss": 1.2888,
202
+ "step": 14500
203
+ },
204
+ {
205
+ "epoch": 2.69,
206
+ "learning_rate": 1.4138559708295352e-05,
207
+ "loss": 1.2767,
208
+ "step": 15000
209
+ },
210
+ {
211
+ "epoch": 2.78,
212
+ "learning_rate": 1.3591613491340019e-05,
213
+ "loss": 1.2713,
214
+ "step": 15500
215
+ },
216
+ {
217
+ "epoch": 2.86,
218
+ "learning_rate": 1.3044667274384685e-05,
219
+ "loss": 1.253,
220
+ "step": 16000
221
+ },
222
+ {
223
+ "epoch": 2.95,
224
+ "learning_rate": 1.2497721057429352e-05,
225
+ "loss": 1.262,
226
+ "step": 16500
227
+ },
228
+ {
229
+ "epoch": 3.0,
230
+ "eval_bleu": 1.0163,
231
+ "eval_gen_len": 17.1,
232
+ "eval_loss": 1.4149495363235474,
233
+ "eval_runtime": 367.931,
234
+ "eval_samples_per_second": 56.459,
235
+ "eval_steps_per_second": 0.883,
236
+ "step": 16755
237
+ },
238
+ {
239
+ "epoch": 3.04,
240
+ "learning_rate": 1.195077484047402e-05,
241
+ "loss": 1.2486,
242
+ "step": 17000
243
+ },
244
+ {
245
+ "epoch": 3.13,
246
+ "learning_rate": 1.1403828623518687e-05,
247
+ "loss": 1.2413,
248
+ "step": 17500
249
+ },
250
+ {
251
+ "epoch": 3.22,
252
+ "learning_rate": 1.0856882406563355e-05,
253
+ "loss": 1.2174,
254
+ "step": 18000
255
+ },
256
+ {
257
+ "epoch": 3.31,
258
+ "learning_rate": 1.0309936189608021e-05,
259
+ "loss": 1.2258,
260
+ "step": 18500
261
+ },
262
+ {
263
+ "epoch": 3.4,
264
+ "learning_rate": 9.76298997265269e-06,
265
+ "loss": 1.2378,
266
+ "step": 19000
267
+ },
268
+ {
269
+ "epoch": 3.49,
270
+ "learning_rate": 9.216043755697356e-06,
271
+ "loss": 1.2286,
272
+ "step": 19500
273
+ },
274
+ {
275
+ "epoch": 3.58,
276
+ "learning_rate": 8.669097538742024e-06,
277
+ "loss": 1.2317,
278
+ "step": 20000
279
+ },
280
+ {
281
+ "epoch": 3.67,
282
+ "learning_rate": 8.122151321786691e-06,
283
+ "loss": 1.2386,
284
+ "step": 20500
285
+ },
286
+ {
287
+ "epoch": 3.76,
288
+ "learning_rate": 7.575205104831359e-06,
289
+ "loss": 1.22,
290
+ "step": 21000
291
+ },
292
+ {
293
+ "epoch": 3.85,
294
+ "learning_rate": 7.028258887876026e-06,
295
+ "loss": 1.2252,
296
+ "step": 21500
297
+ },
298
+ {
299
+ "epoch": 3.94,
300
+ "learning_rate": 6.481312670920693e-06,
301
+ "loss": 1.2175,
302
+ "step": 22000
303
+ },
304
+ {
305
+ "epoch": 4.0,
306
+ "eval_bleu": 0.9445,
307
+ "eval_gen_len": 17.2,
308
+ "eval_loss": 1.416054606437683,
309
+ "eval_runtime": 367.7817,
310
+ "eval_samples_per_second": 56.482,
311
+ "eval_steps_per_second": 0.884,
312
+ "step": 22340
313
+ },
314
+ {
315
+ "epoch": 4.03,
316
+ "learning_rate": 5.9343664539653605e-06,
317
+ "loss": 1.2158,
318
+ "step": 22500
319
+ },
320
+ {
321
+ "epoch": 4.12,
322
+ "learning_rate": 5.387420237010028e-06,
323
+ "loss": 1.1916,
324
+ "step": 23000
325
+ },
326
+ {
327
+ "epoch": 4.21,
328
+ "learning_rate": 4.8404740200546945e-06,
329
+ "loss": 1.2079,
330
+ "step": 23500
331
+ },
332
+ {
333
+ "epoch": 4.3,
334
+ "learning_rate": 4.293527803099362e-06,
335
+ "loss": 1.2124,
336
+ "step": 24000
337
+ },
338
+ {
339
+ "epoch": 4.39,
340
+ "learning_rate": 3.7465815861440297e-06,
341
+ "loss": 1.2006,
342
+ "step": 24500
343
+ },
344
+ {
345
+ "epoch": 4.48,
346
+ "learning_rate": 3.199635369188696e-06,
347
+ "loss": 1.2121,
348
+ "step": 25000
349
+ },
350
+ {
351
+ "epoch": 4.57,
352
+ "learning_rate": 2.6526891522333636e-06,
353
+ "loss": 1.2022,
354
+ "step": 25500
355
+ },
356
+ {
357
+ "epoch": 4.66,
358
+ "learning_rate": 2.105742935278031e-06,
359
+ "loss": 1.2029,
360
+ "step": 26000
361
+ },
362
+ {
363
+ "epoch": 4.74,
364
+ "learning_rate": 1.5587967183226983e-06,
365
+ "loss": 1.2008,
366
+ "step": 26500
367
+ },
368
+ {
369
+ "epoch": 4.83,
370
+ "learning_rate": 1.0118505013673655e-06,
371
+ "loss": 1.2193,
372
+ "step": 27000
373
+ },
374
+ {
375
+ "epoch": 4.92,
376
+ "learning_rate": 4.6490428441203284e-07,
377
+ "loss": 1.2108,
378
+ "step": 27500
379
+ },
380
+ {
381
+ "epoch": 5.0,
382
+ "eval_bleu": 1.0114,
383
+ "eval_gen_len": 17.3,
384
+ "eval_loss": 1.4185353517532349,
385
+ "eval_runtime": 367.8873,
386
+ "eval_samples_per_second": 56.466,
387
+ "eval_steps_per_second": 0.883,
388
+ "step": 27925
389
+ },
390
+ {
391
+ "epoch": 5.0,
392
+ "step": 27925,
393
+ "total_flos": 5.320769336064e+17,
394
+ "train_loss": 1.3384765100436438,
395
+ "train_runtime": 44633.7325,
396
+ "train_samples_per_second": 40.04,
397
+ "train_steps_per_second": 0.626
398
+ }
399
+ ],
400
+ "max_steps": 27925,
401
+ "num_train_epochs": 5,
402
+ "total_flos": 5.320769336064e+17,
403
+ "trial_name": null,
404
+ "trial_params": null
405
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:367bad5f9682264dadbc8c88b4f7f5c52170059a3d5be12dce6a03ef1cd30281
3
+ size 3183