File size: 2,258 Bytes
9a97de5 54933ab 1fdcd75 9a97de5 1fdcd75 73b145e 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 9a97de5 1fdcd75 73b145e 1fdcd75 73b145e 1fdcd75 73b145e 1fdcd75 73b145e 9a97de5 b771bee 73b145e 1fdcd75 73b145e 9a97de5 1fdcd75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
{
"_name_or_path_": "Wonder-Griffin/Judge-GPT2",
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel",
"JudgeModel",
"JudgeCasualLMHead",
"JudgeWithQA",
"JudgeClassifier"
],
"attn_pdrop": 0.1,
"batch_size": 32,
"bias": true,
"block_size": 512,
"bos_token_id": 50256,
"dim_feedforward": 3072,
"dropout": 0.1,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"ff_expansion_factor": 4,
"hidden_act": "gelu",
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1",
"2": "LABEL_2",
"3": "LABEL_3",
"4": "LABEL_4"
},
"inference_mode": true,
"initializer_range": 0.02,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_2": 2,
"LABEL_3": 3,
"LABEL_4": 4
},
"label_smoothing": 0.1,
"layer_norm_epsilon": 1e-05,
"learning_rate": 0.0003,
"log_interval": 100,
"max_grad_norm": 1.0,
"model_type": "gpt2",
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 12,
"n_positions": 512,
"output_dir": "C:/Users/wonde/output",
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_heads": {
"classifier_head": {
"params": {
"num_labels": 5
},
"type": "JudgeClassifier"
},
"lm_head": {
"params": {
"vocab_size": 50257
},
"type": "JudgeCasualLMHead"
},
"qa_head": {
"params": {
"num_labels": 2
},
"type": "JudgeWithQA"
}
},
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 100
},
"question-answering": {
"max_answer_length": 100
},
"sequence-classification": {
"eval_steps": 500
}
},
"pretrained_weights": "Wonder-Griffin/Judge-GPT2",
"tokenizer": {
"type": "AutoTokenizer",
"params": {
"vocab_size": 50257
}
},
"torch_dtype": "float32",
"total_steps": 10000,
"transformers_version": "4.43.3",
"use_cache": true,
"vocab_size": 50257,
"warmup_steps": 1000,
"weight_decay": 0.01
}
|