File size: 2,258 Bytes
9a97de5
54933ab
1fdcd75
9a97de5
1fdcd75
73b145e
 
 
 
9a97de5
1fdcd75
9a97de5
 
 
1fdcd75
9a97de5
 
1fdcd75
 
9a97de5
 
 
 
 
 
 
 
 
1fdcd75
 
9a97de5
 
 
 
 
 
 
 
1fdcd75
9a97de5
 
 
 
 
 
1fdcd75
9a97de5
1fdcd75
9a97de5
1fdcd75
 
 
 
 
 
 
 
 
73b145e
1fdcd75
 
 
 
 
 
73b145e
 
 
1fdcd75
 
73b145e
 
 
 
1fdcd75
 
73b145e
 
 
9a97de5
 
 
b771bee
73b145e
 
1fdcd75
73b145e
 
 
9a97de5
1fdcd75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{
  "_name_or_path_": "Wonder-Griffin/Judge-GPT2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel",
    "JudgeModel",
    "JudgeCasualLMHead",
    "JudgeWithQA",
    "JudgeClassifier"
  ],
  "attn_pdrop": 0.1,
  "batch_size": 32,
  "bias": true,
  "block_size": 512,
  "bos_token_id": 50256,
  "dim_feedforward": 3072,
  "dropout": 0.1,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "ff_expansion_factor": 4,
  "hidden_act": "gelu",
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "inference_mode": true,
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "label_smoothing": 0.1,
  "layer_norm_epsilon": 1e-05,
  "learning_rate": 0.0003,
  "log_interval": 100,
  "max_grad_norm": 1.0,
  "model_type": "gpt2",
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 512,
  "output_dir": "C:/Users/wonde/output",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_heads": {
    "classifier_head": {
      "params": {
        "num_labels": 5
      },
      "type": "JudgeClassifier"
    },
    "lm_head": {
      "params": {
        "vocab_size": 50257
      },
      "type": "JudgeCasualLMHead"
    },
    "qa_head": {
      "params": {
        "num_labels": 2
      },
      "type": "JudgeWithQA"
    }
  },

  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 100
    },
    "question-answering": {
      "max_answer_length": 100
    },
    "sequence-classification": {
      "eval_steps": 500
    }
  },

  "pretrained_weights": "Wonder-Griffin/Judge-GPT2",
  "tokenizer": {
    "type": "AutoTokenizer",
    "params": {
      "vocab_size": 50257
    }
  },

  "torch_dtype": "float32",
  "total_steps": 10000,
  "transformers_version": "4.43.3",
  "use_cache": true,
  "vocab_size": 50257,
  "warmup_steps": 1000,
  "weight_decay": 0.01
}