Files changed (1) hide show
  1. config.yaml +610 -0
config.yaml ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.9.16
7
+ cli_version: 0.15.0
8
+ framework: huggingface
9
+ huggingface_version: 4.28.1
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1682185393.299029
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 12
21
+ - 49
22
+ - 51
23
+ - 53
24
+ - 55
25
+ 2:
26
+ - 1
27
+ - 2
28
+ - 3
29
+ - 5
30
+ - 11
31
+ - 12
32
+ - 49
33
+ - 51
34
+ - 53
35
+ - 55
36
+ 3:
37
+ - 2
38
+ - 7
39
+ - 13
40
+ - 23
41
+ 4: 3.9.16
42
+ 5: 0.15.0
43
+ 6: 4.28.1
44
+ 8:
45
+ - 1
46
+ - 5
47
+ m:
48
+ - 1: train/global_step
49
+ 6:
50
+ - 3
51
+ return_dict:
52
+ desc: null
53
+ value: true
54
+ output_hidden_states:
55
+ desc: null
56
+ value: false
57
+ output_attentions:
58
+ desc: null
59
+ value: false
60
+ torchscript:
61
+ desc: null
62
+ value: false
63
+ torch_dtype:
64
+ desc: null
65
+ value: float32
66
+ use_bfloat16:
67
+ desc: null
68
+ value: false
69
+ tf_legacy_loss:
70
+ desc: null
71
+ value: false
72
+ pruned_heads:
73
+ desc: null
74
+ value: {}
75
+ tie_word_embeddings:
76
+ desc: null
77
+ value: true
78
+ is_encoder_decoder:
79
+ desc: null
80
+ value: false
81
+ is_decoder:
82
+ desc: null
83
+ value: false
84
+ cross_attention_hidden_size:
85
+ desc: null
86
+ value: null
87
+ add_cross_attention:
88
+ desc: null
89
+ value: false
90
+ tie_encoder_decoder:
91
+ desc: null
92
+ value: false
93
+ max_length:
94
+ desc: null
95
+ value: 20
96
+ min_length:
97
+ desc: null
98
+ value: 0
99
+ do_sample:
100
+ desc: null
101
+ value: false
102
+ early_stopping:
103
+ desc: null
104
+ value: false
105
+ num_beams:
106
+ desc: null
107
+ value: 1
108
+ num_beam_groups:
109
+ desc: null
110
+ value: 1
111
+ diversity_penalty:
112
+ desc: null
113
+ value: 0.0
114
+ temperature:
115
+ desc: null
116
+ value: 1.0
117
+ top_k:
118
+ desc: null
119
+ value: 50
120
+ top_p:
121
+ desc: null
122
+ value: 1.0
123
+ typical_p:
124
+ desc: null
125
+ value: 1.0
126
+ repetition_penalty:
127
+ desc: null
128
+ value: 1.0
129
+ length_penalty:
130
+ desc: null
131
+ value: 1.0
132
+ no_repeat_ngram_size:
133
+ desc: null
134
+ value: 0
135
+ encoder_no_repeat_ngram_size:
136
+ desc: null
137
+ value: 0
138
+ bad_words_ids:
139
+ desc: null
140
+ value: null
141
+ num_return_sequences:
142
+ desc: null
143
+ value: 1
144
+ chunk_size_feed_forward:
145
+ desc: null
146
+ value: 0
147
+ output_scores:
148
+ desc: null
149
+ value: false
150
+ return_dict_in_generate:
151
+ desc: null
152
+ value: false
153
+ forced_bos_token_id:
154
+ desc: null
155
+ value: null
156
+ forced_eos_token_id:
157
+ desc: null
158
+ value: null
159
+ remove_invalid_values:
160
+ desc: null
161
+ value: false
162
+ exponential_decay_length_penalty:
163
+ desc: null
164
+ value: null
165
+ suppress_tokens:
166
+ desc: null
167
+ value: null
168
+ begin_suppress_tokens:
169
+ desc: null
170
+ value: null
171
+ architectures:
172
+ desc: null
173
+ value:
174
+ - ElectraForSequenceClassification
175
+ finetuning_task:
176
+ desc: null
177
+ value: null
178
+ id2label:
179
+ desc: null
180
+ value:
181
+ '0': LABEL_0
182
+ '1': LABEL_1
183
+ label2id:
184
+ desc: null
185
+ value:
186
+ LABEL_0: 0
187
+ LABEL_1: 1
188
+ tokenizer_class:
189
+ desc: null
190
+ value: null
191
+ prefix:
192
+ desc: null
193
+ value: null
194
+ bos_token_id:
195
+ desc: null
196
+ value: null
197
+ pad_token_id:
198
+ desc: null
199
+ value: 0
200
+ eos_token_id:
201
+ desc: null
202
+ value: null
203
+ sep_token_id:
204
+ desc: null
205
+ value: null
206
+ decoder_start_token_id:
207
+ desc: null
208
+ value: null
209
+ task_specific_params:
210
+ desc: null
211
+ value: null
212
+ problem_type:
213
+ desc: null
214
+ value: single_label_classification
215
+ _name_or_path:
216
+ desc: null
217
+ value: dhruvladia/suicidal-electra
218
+ transformers_version:
219
+ desc: null
220
+ value: 4.28.1
221
+ model_type:
222
+ desc: null
223
+ value: electra
224
+ vocab_size:
225
+ desc: null
226
+ value: 30522
227
+ embedding_size:
228
+ desc: null
229
+ value: 768
230
+ hidden_size:
231
+ desc: null
232
+ value: 768
233
+ num_hidden_layers:
234
+ desc: null
235
+ value: 12
236
+ num_attention_heads:
237
+ desc: null
238
+ value: 12
239
+ intermediate_size:
240
+ desc: null
241
+ value: 3072
242
+ hidden_act:
243
+ desc: null
244
+ value: gelu
245
+ hidden_dropout_prob:
246
+ desc: null
247
+ value: 0.1
248
+ attention_probs_dropout_prob:
249
+ desc: null
250
+ value: 0.1
251
+ max_position_embeddings:
252
+ desc: null
253
+ value: 512
254
+ type_vocab_size:
255
+ desc: null
256
+ value: 2
257
+ initializer_range:
258
+ desc: null
259
+ value: 0.02
260
+ layer_norm_eps:
261
+ desc: null
262
+ value: 1.0e-12
263
+ summary_type:
264
+ desc: null
265
+ value: first
266
+ summary_use_proj:
267
+ desc: null
268
+ value: true
269
+ summary_activation:
270
+ desc: null
271
+ value: gelu
272
+ summary_last_dropout:
273
+ desc: null
274
+ value: 0.1
275
+ position_embedding_type:
276
+ desc: null
277
+ value: absolute
278
+ use_cache:
279
+ desc: null
280
+ value: true
281
+ classifier_dropout:
282
+ desc: null
283
+ value: null
284
+ output_dir:
285
+ desc: null
286
+ value: Models/electra_checkpoint
287
+ overwrite_output_dir:
288
+ desc: null
289
+ value: true
290
+ do_train:
291
+ desc: null
292
+ value: false
293
+ do_eval:
294
+ desc: null
295
+ value: false
296
+ do_predict:
297
+ desc: null
298
+ value: false
299
+ evaluation_strategy:
300
+ desc: null
301
+ value: 'no'
302
+ prediction_loss_only:
303
+ desc: null
304
+ value: false
305
+ per_device_train_batch_size:
306
+ desc: null
307
+ value: 6
308
+ per_device_eval_batch_size:
309
+ desc: null
310
+ value: 6
311
+ per_gpu_train_batch_size:
312
+ desc: null
313
+ value: None
314
+ per_gpu_eval_batch_size:
315
+ desc: null
316
+ value: None
317
+ gradient_accumulation_steps:
318
+ desc: null
319
+ value: 1
320
+ eval_accumulation_steps:
321
+ desc: null
322
+ value: None
323
+ eval_delay:
324
+ desc: null
325
+ value: 0
326
+ learning_rate:
327
+ desc: null
328
+ value: 1.0e-05
329
+ weight_decay:
330
+ desc: null
331
+ value: 0.0
332
+ adam_beta1:
333
+ desc: null
334
+ value: 0.9
335
+ adam_beta2:
336
+ desc: null
337
+ value: 0.999
338
+ adam_epsilon:
339
+ desc: null
340
+ value: 1.0e-08
341
+ max_grad_norm:
342
+ desc: null
343
+ value: 1.0
344
+ num_train_epochs:
345
+ desc: null
346
+ value: 1
347
+ max_steps:
348
+ desc: null
349
+ value: -1
350
+ lr_scheduler_type:
351
+ desc: null
352
+ value: linear
353
+ warmup_ratio:
354
+ desc: null
355
+ value: 0.0
356
+ warmup_steps:
357
+ desc: null
358
+ value: 0
359
+ log_level:
360
+ desc: null
361
+ value: passive
362
+ log_level_replica:
363
+ desc: null
364
+ value: warning
365
+ log_on_each_node:
366
+ desc: null
367
+ value: true
368
+ logging_dir:
369
+ desc: null
370
+ value: Models/electra_checkpoint/logs
371
+ logging_strategy:
372
+ desc: null
373
+ value: steps
374
+ logging_first_step:
375
+ desc: null
376
+ value: false
377
+ logging_steps:
378
+ desc: null
379
+ value: 500
380
+ logging_nan_inf_filter:
381
+ desc: null
382
+ value: true
383
+ save_strategy:
384
+ desc: null
385
+ value: steps
386
+ save_steps:
387
+ desc: null
388
+ value: 1500
389
+ save_total_limit:
390
+ desc: null
391
+ value: None
392
+ save_safetensors:
393
+ desc: null
394
+ value: false
395
+ save_on_each_node:
396
+ desc: null
397
+ value: false
398
+ no_cuda:
399
+ desc: null
400
+ value: false
401
+ use_mps_device:
402
+ desc: null
403
+ value: false
404
+ seed:
405
+ desc: null
406
+ value: 4222
407
+ data_seed:
408
+ desc: null
409
+ value: None
410
+ jit_mode_eval:
411
+ desc: null
412
+ value: false
413
+ use_ipex:
414
+ desc: null
415
+ value: false
416
+ bf16:
417
+ desc: null
418
+ value: false
419
+ fp16:
420
+ desc: null
421
+ value: false
422
+ fp16_opt_level:
423
+ desc: null
424
+ value: O1
425
+ half_precision_backend:
426
+ desc: null
427
+ value: auto
428
+ bf16_full_eval:
429
+ desc: null
430
+ value: false
431
+ fp16_full_eval:
432
+ desc: null
433
+ value: false
434
+ tf32:
435
+ desc: null
436
+ value: None
437
+ local_rank:
438
+ desc: null
439
+ value: -1
440
+ xpu_backend:
441
+ desc: null
442
+ value: None
443
+ tpu_num_cores:
444
+ desc: null
445
+ value: None
446
+ tpu_metrics_debug:
447
+ desc: null
448
+ value: false
449
+ debug:
450
+ desc: null
451
+ value: '[]'
452
+ dataloader_drop_last:
453
+ desc: null
454
+ value: false
455
+ eval_steps:
456
+ desc: null
457
+ value: None
458
+ dataloader_num_workers:
459
+ desc: null
460
+ value: 0
461
+ past_index:
462
+ desc: null
463
+ value: -1
464
+ run_name:
465
+ desc: null
466
+ value: electra
467
+ disable_tqdm:
468
+ desc: null
469
+ value: false
470
+ remove_unused_columns:
471
+ desc: null
472
+ value: true
473
+ label_names:
474
+ desc: null
475
+ value: None
476
+ load_best_model_at_end:
477
+ desc: null
478
+ value: false
479
+ metric_for_best_model:
480
+ desc: null
481
+ value: None
482
+ greater_is_better:
483
+ desc: null
484
+ value: None
485
+ ignore_data_skip:
486
+ desc: null
487
+ value: false
488
+ sharded_ddp:
489
+ desc: null
490
+ value: '[]'
491
+ fsdp:
492
+ desc: null
493
+ value: '[]'
494
+ fsdp_min_num_params:
495
+ desc: null
496
+ value: 0
497
+ fsdp_config:
498
+ desc: null
499
+ value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
500
+ fsdp_transformer_layer_cls_to_wrap:
501
+ desc: null
502
+ value: None
503
+ deepspeed:
504
+ desc: null
505
+ value: None
506
+ label_smoothing_factor:
507
+ desc: null
508
+ value: 0.0
509
+ optim:
510
+ desc: null
511
+ value: adamw_hf
512
+ optim_args:
513
+ desc: null
514
+ value: None
515
+ adafactor:
516
+ desc: null
517
+ value: false
518
+ group_by_length:
519
+ desc: null
520
+ value: false
521
+ length_column_name:
522
+ desc: null
523
+ value: length
524
+ report_to:
525
+ desc: null
526
+ value: '[''wandb'']'
527
+ ddp_find_unused_parameters:
528
+ desc: null
529
+ value: None
530
+ ddp_bucket_cap_mb:
531
+ desc: null
532
+ value: None
533
+ dataloader_pin_memory:
534
+ desc: null
535
+ value: true
536
+ skip_memory_metrics:
537
+ desc: null
538
+ value: true
539
+ use_legacy_prediction_loop:
540
+ desc: null
541
+ value: false
542
+ push_to_hub:
543
+ desc: null
544
+ value: false
545
+ resume_from_checkpoint:
546
+ desc: null
547
+ value: None
548
+ hub_model_id:
549
+ desc: null
550
+ value: None
551
+ hub_strategy:
552
+ desc: null
553
+ value: every_save
554
+ hub_token:
555
+ desc: null
556
+ value: <HUB_TOKEN>
557
+ hub_private_repo:
558
+ desc: null
559
+ value: false
560
+ gradient_checkpointing:
561
+ desc: null
562
+ value: false
563
+ include_inputs_for_metrics:
564
+ desc: null
565
+ value: false
566
+ fp16_backend:
567
+ desc: null
568
+ value: auto
569
+ push_to_hub_model_id:
570
+ desc: null
571
+ value: None
572
+ push_to_hub_organization:
573
+ desc: null
574
+ value: None
575
+ push_to_hub_token:
576
+ desc: null
577
+ value: <PUSH_TO_HUB_TOKEN>
578
+ mp_parameters:
579
+ desc: null
580
+ value: ''
581
+ auto_find_batch_size:
582
+ desc: null
583
+ value: false
584
+ full_determinism:
585
+ desc: null
586
+ value: false
587
+ torchdynamo:
588
+ desc: null
589
+ value: None
590
+ ray_scope:
591
+ desc: null
592
+ value: last
593
+ ddp_timeout:
594
+ desc: null
595
+ value: 1800
596
+ torch_compile:
597
+ desc: null
598
+ value: false
599
+ torch_compile_backend:
600
+ desc: null
601
+ value: None
602
+ torch_compile_mode:
603
+ desc: null
604
+ value: None
605
+ train_batch_size:
606
+ desc: null
607
+ value: 6
608
+ eval_batch_size:
609
+ desc: null
610
+ value: 6