Wonder-Griffin commited on
Commit
9a97de5
·
verified ·
1 Parent(s): 1693c68

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +69 -43
config.json CHANGED
@@ -1,43 +1,69 @@
1
- {
2
- "architectures": [
3
- "JudgeModel"
4
- ],
5
- "batch_size": 32,
6
- "bias": true,
7
- "block_size": 512,
8
- "dim_feedforward": 3072,
9
- "dropout": 0.1,
10
- "ff_expansion_factor": 4,
11
- "hidden_act": "gelu",
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "LABEL_0",
15
- "1": "LABEL_1",
16
- "2": "LABEL_2",
17
- "3": "LABEL_3",
18
- "4": "LABEL_4"
19
- },
20
- "label2id": {
21
- "LABEL_0": 0,
22
- "LABEL_1": 1,
23
- "LABEL_2": 2,
24
- "LABEL_3": 3,
25
- "LABEL_4": 4
26
- },
27
- "label_smoothing": 0.1,
28
- "learning_rate": 0.0003,
29
- "log_interval": 100,
30
- "max_grad_norm": 1.0,
31
- "max_position_embeddings": 512,
32
- "model_type": "openai-gpt",
33
- "n_embd": 768,
34
- "n_head": 12,
35
- "n_layer": 12,
36
- "output_dir": "C:/Users/wonde/output",
37
- "torch_dtype": "float32",
38
- "total_steps": 10000,
39
- "transformers_version": "4.43.3",
40
- "vocab_size": 50257,
41
- "warmup_steps": 1000,
42
- "weight_decay": 0.01
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPT2LMHeadModel"
4
+ ],
5
+ "inference_mode": true,
6
+ "batch_size": 32,
7
+ "bias": true,
8
+ "block_size": 512,
9
+ "dim_feedforward": 3072,
10
+ "dropout": 0.1,
11
+ "ff_expansion_factor": 4,
12
+ "hidden_act": "gelu",
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4"
20
+ },
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4
27
+ },
28
+ "label_smoothing": 0.1,
29
+ "learning_rate": 0.0003,
30
+ "log_interval": 100,
31
+ "max_grad_norm": 1.0,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "gpt2",
34
+ "n_embd": 768,
35
+ "n_head": 12,
36
+ "n_layer": 12,
37
+ "output_dir": "C:/Users/wonde/output",
38
+ "torch_dtype": "float32",
39
+ "total_steps": 10000,
40
+ "transformers_version": "4.43.3",
41
+ "vocab_size": 50257,
42
+ "warmup_steps": 1000,
43
+ "weight_decay": 0.01,
44
+ "megatron_config": null,
45
+ "megatron_core": "megatron.core",
46
+ "modules_to_save": [
47
+ "classifier",
48
+ "score"
49
+ ],
50
+ "peft_type": "LORA",
51
+ "r": 16,
52
+ "rank_pattern": {},
53
+ "revision": null,
54
+ "target_modules": [
55
+ "c_proj",
56
+ "c_attn",
57
+ "c_fc",
58
+ "score"
59
+ ],
60
+ "task_specific_params": {
61
+ "text-generation": {
62
+ "do_sample": true,
63
+ "max_length": 50
64
+ }
65
+ },
66
+ "task_type": "SEQ_CLS",
67
+ "use_dora": false,
68
+ "use_rslora": false
69
+ }