shanghong commited on
Commit
3c0a50b
·
verified ·
1 Parent(s): f69c342

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/data/maojia/save_ckpt_single/checkpoint-80",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_position_embeddings": 4096,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 32,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.3",
27
+ "use_cache": false,
28
+ "vocab_size": 32000
29
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 4096,
6
+ "pad_token_id": 0,
7
+ "temperature": 0.6,
8
+ "top_p": 0.9,
9
+ "transformers_version": "4.42.3"
10
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6892668dbd79cf181ca66a38aa0b3fbe650d8dfef81f58a8d49f8a728fdd236e
3
+ size 4938985352
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:479e7baa647bb335ac609f1e2c95357b97beeb83566b612338197633b9a607ff
3
+ size 4947390880
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cecb679bbafec7a205776882639ad6be98018b2f4dc68cda01e95d4364814e5
3
+ size 3590488816
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13476831232
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": false,
36
+ "max_length": 2048,
37
+ "model_max_length": 2048,
38
+ "pad_token": "<unk>",
39
+ "padding_side": "right",
40
+ "sp_model_kwargs": {},
41
+ "stride": 0,
42
+ "tokenizer_class": "LlamaTokenizer",
43
+ "truncation_side": "left",
44
+ "truncation_strategy": "longest_first",
45
+ "unk_token": "<unk>",
46
+ "use_default_system_prompt": false
47
+ }
trainer_state.json ADDED
@@ -0,0 +1,1424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.6,
5
+ "eval_steps": 5,
6
+ "global_step": 130,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.024615384615384615,
13
+ "grad_norm": 51.137206373851456,
14
+ "learning_rate": 5.88235294117647e-08,
15
+ "logits/chosen": -0.5265347361564636,
16
+ "logits/rejected": -0.5229614973068237,
17
+ "logps/chosen": -40.1543083190918,
18
+ "logps/rejected": -40.267845153808594,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.04923076923076923,
28
+ "grad_norm": 53.426708668382766,
29
+ "learning_rate": 1.176470588235294e-07,
30
+ "logits/chosen": -0.5090410113334656,
31
+ "logits/rejected": -0.502047061920166,
32
+ "logps/chosen": -45.320037841796875,
33
+ "logps/rejected": -42.6551399230957,
34
+ "loss": 0.6918,
35
+ "rewards/accuracies": 0.453125,
36
+ "rewards/chosen": 0.014461740851402283,
37
+ "rewards/margins": 0.0072987377643585205,
38
+ "rewards/rejected": 0.007163003087043762,
39
+ "step": 4
40
+ },
41
+ {
42
+ "epoch": 0.06153846153846154,
43
+ "eval_logits/chosen": -0.49676236510276794,
44
+ "eval_logits/rejected": -0.4936961829662323,
45
+ "eval_logps/chosen": -45.20378875732422,
46
+ "eval_logps/rejected": -42.88367462158203,
47
+ "eval_loss": 0.6931462287902832,
48
+ "eval_rewards/accuracies": 0.4623287618160248,
49
+ "eval_rewards/chosen": 0.0008219562005251646,
50
+ "eval_rewards/margins": 0.0035426486283540726,
51
+ "eval_rewards/rejected": -0.002720692427828908,
52
+ "eval_runtime": 400.2137,
53
+ "eval_samples_per_second": 4.333,
54
+ "eval_steps_per_second": 0.182,
55
+ "step": 5
56
+ },
57
+ {
58
+ "epoch": 0.07384615384615385,
59
+ "grad_norm": 47.768106379014085,
60
+ "learning_rate": 1.764705882352941e-07,
61
+ "logits/chosen": -0.4896703064441681,
62
+ "logits/rejected": -0.48678484559059143,
63
+ "logps/chosen": -47.16059875488281,
64
+ "logps/rejected": -35.45697021484375,
65
+ "loss": 0.6958,
66
+ "rewards/accuracies": 0.484375,
67
+ "rewards/chosen": 0.007428005337715149,
68
+ "rewards/margins": 0.012338653206825256,
69
+ "rewards/rejected": -0.004910647869110107,
70
+ "step": 6
71
+ },
72
+ {
73
+ "epoch": 0.09846153846153846,
74
+ "grad_norm": 39.51012410260543,
75
+ "learning_rate": 2.352941176470588e-07,
76
+ "logits/chosen": -0.4996076822280884,
77
+ "logits/rejected": -0.49669766426086426,
78
+ "logps/chosen": -47.94187545776367,
79
+ "logps/rejected": -41.556766510009766,
80
+ "loss": 0.6914,
81
+ "rewards/accuracies": 0.5625,
82
+ "rewards/chosen": 0.018062740564346313,
83
+ "rewards/margins": 0.025705993175506592,
84
+ "rewards/rejected": -0.007643252611160278,
85
+ "step": 8
86
+ },
87
+ {
88
+ "epoch": 0.12307692307692308,
89
+ "grad_norm": 39.08254813329663,
90
+ "learning_rate": 2.941176470588235e-07,
91
+ "logits/chosen": -0.49092355370521545,
92
+ "logits/rejected": -0.48874035477638245,
93
+ "logps/chosen": -50.65516662597656,
94
+ "logps/rejected": -30.806886672973633,
95
+ "loss": 0.6842,
96
+ "rewards/accuracies": 0.515625,
97
+ "rewards/chosen": 0.016676604747772217,
98
+ "rewards/margins": 0.011630743741989136,
99
+ "rewards/rejected": 0.005045861005783081,
100
+ "step": 10
101
+ },
102
+ {
103
+ "epoch": 0.12307692307692308,
104
+ "eval_logits/chosen": -0.49970191717147827,
105
+ "eval_logits/rejected": -0.4963047206401825,
106
+ "eval_logps/chosen": -45.12245178222656,
107
+ "eval_logps/rejected": -42.913116455078125,
108
+ "eval_loss": 0.6740441918373108,
109
+ "eval_rewards/accuracies": 0.5410959124565125,
110
+ "eval_rewards/chosen": 0.041489191353321075,
111
+ "eval_rewards/margins": 0.058929312974214554,
112
+ "eval_rewards/rejected": -0.017440123483538628,
113
+ "eval_runtime": 398.3897,
114
+ "eval_samples_per_second": 4.353,
115
+ "eval_steps_per_second": 0.183,
116
+ "step": 10
117
+ },
118
+ {
119
+ "epoch": 0.1476923076923077,
120
+ "grad_norm": 38.09131172735928,
121
+ "learning_rate": 3.529411764705882e-07,
122
+ "logits/chosen": -0.48407822847366333,
123
+ "logits/rejected": -0.4791830778121948,
124
+ "logps/chosen": -45.49330139160156,
125
+ "logps/rejected": -42.8103141784668,
126
+ "loss": 0.6725,
127
+ "rewards/accuracies": 0.609375,
128
+ "rewards/chosen": 0.04486130177974701,
129
+ "rewards/margins": 0.06311255693435669,
130
+ "rewards/rejected": -0.01825125515460968,
131
+ "step": 12
132
+ },
133
+ {
134
+ "epoch": 0.1723076923076923,
135
+ "grad_norm": 35.41525379746354,
136
+ "learning_rate": 4.117647058823529e-07,
137
+ "logits/chosen": -0.5208038091659546,
138
+ "logits/rejected": -0.5175592303276062,
139
+ "logps/chosen": -40.418609619140625,
140
+ "logps/rejected": -34.53902816772461,
141
+ "loss": 0.6501,
142
+ "rewards/accuracies": 0.546875,
143
+ "rewards/chosen": 0.11583159863948822,
144
+ "rewards/margins": 0.07396885752677917,
145
+ "rewards/rejected": 0.041862741112709045,
146
+ "step": 14
147
+ },
148
+ {
149
+ "epoch": 0.18461538461538463,
150
+ "eval_logits/chosen": -0.4988827705383301,
151
+ "eval_logits/rejected": -0.4956616461277008,
152
+ "eval_logps/chosen": -44.772987365722656,
153
+ "eval_logps/rejected": -42.95915985107422,
154
+ "eval_loss": 0.6234598159790039,
155
+ "eval_rewards/accuracies": 0.6404109597206116,
156
+ "eval_rewards/chosen": 0.2162197083234787,
157
+ "eval_rewards/margins": 0.25668132305145264,
158
+ "eval_rewards/rejected": -0.040461596101522446,
159
+ "eval_runtime": 398.8168,
160
+ "eval_samples_per_second": 4.348,
161
+ "eval_steps_per_second": 0.183,
162
+ "step": 15
163
+ },
164
+ {
165
+ "epoch": 0.19692307692307692,
166
+ "grad_norm": 33.20243289262748,
167
+ "learning_rate": 4.705882352941176e-07,
168
+ "logits/chosen": -0.49464067816734314,
169
+ "logits/rejected": -0.4926251173019409,
170
+ "logps/chosen": -41.81456756591797,
171
+ "logps/rejected": -34.730552673339844,
172
+ "loss": 0.6139,
173
+ "rewards/accuracies": 0.734375,
174
+ "rewards/chosen": 0.22246630489826202,
175
+ "rewards/margins": 0.1789122372865677,
176
+ "rewards/rejected": 0.043554067611694336,
177
+ "step": 16
178
+ },
179
+ {
180
+ "epoch": 0.22153846153846155,
181
+ "grad_norm": 31.414401498072234,
182
+ "learning_rate": 4.99941324504621e-07,
183
+ "logits/chosen": -0.5083720684051514,
184
+ "logits/rejected": -0.5054486393928528,
185
+ "logps/chosen": -40.7899169921875,
186
+ "logps/rejected": -41.80813217163086,
187
+ "loss": 0.6038,
188
+ "rewards/accuracies": 0.71875,
189
+ "rewards/chosen": 0.22346775233745575,
190
+ "rewards/margins": 0.36676231026649475,
191
+ "rewards/rejected": -0.143294557929039,
192
+ "step": 18
193
+ },
194
+ {
195
+ "epoch": 0.24615384615384617,
196
+ "grad_norm": 26.94475877791274,
197
+ "learning_rate": 4.99472085783721e-07,
198
+ "logits/chosen": -0.5102497935295105,
199
+ "logits/rejected": -0.5054219961166382,
200
+ "logps/chosen": -38.036109924316406,
201
+ "logps/rejected": -41.01247787475586,
202
+ "loss": 0.5437,
203
+ "rewards/accuracies": 0.75,
204
+ "rewards/chosen": 0.5671411752700806,
205
+ "rewards/margins": 0.6098884344100952,
206
+ "rewards/rejected": -0.04274718463420868,
207
+ "step": 20
208
+ },
209
+ {
210
+ "epoch": 0.24615384615384617,
211
+ "eval_logits/chosen": -0.4962932765483856,
212
+ "eval_logits/rejected": -0.49319401383399963,
213
+ "eval_logps/chosen": -43.80841827392578,
214
+ "eval_logps/rejected": -42.96743392944336,
215
+ "eval_loss": 0.5607677698135376,
216
+ "eval_rewards/accuracies": 0.6952054500579834,
217
+ "eval_rewards/chosen": 0.6985027194023132,
218
+ "eval_rewards/margins": 0.7430992722511292,
219
+ "eval_rewards/rejected": -0.04459657892584801,
220
+ "eval_runtime": 398.3125,
221
+ "eval_samples_per_second": 4.353,
222
+ "eval_steps_per_second": 0.183,
223
+ "step": 20
224
+ },
225
+ {
226
+ "epoch": 0.27076923076923076,
227
+ "grad_norm": 24.12367725777412,
228
+ "learning_rate": 4.985344892885899e-07,
229
+ "logits/chosen": -0.5035347938537598,
230
+ "logits/rejected": -0.5017133951187134,
231
+ "logps/chosen": -42.291107177734375,
232
+ "logps/rejected": -43.623016357421875,
233
+ "loss": 0.5756,
234
+ "rewards/accuracies": 0.734375,
235
+ "rewards/chosen": 0.6317887306213379,
236
+ "rewards/margins": 0.7202781438827515,
237
+ "rewards/rejected": -0.08848930895328522,
238
+ "step": 22
239
+ },
240
+ {
241
+ "epoch": 0.2953846153846154,
242
+ "grad_norm": 24.794972937394068,
243
+ "learning_rate": 4.971302952586796e-07,
244
+ "logits/chosen": -0.5023949146270752,
245
+ "logits/rejected": -0.49643266201019287,
246
+ "logps/chosen": -39.85929489135742,
247
+ "logps/rejected": -46.9812126159668,
248
+ "loss": 0.5219,
249
+ "rewards/accuracies": 0.84375,
250
+ "rewards/chosen": 0.8194392919540405,
251
+ "rewards/margins": 1.2944234609603882,
252
+ "rewards/rejected": -0.4749842882156372,
253
+ "step": 24
254
+ },
255
+ {
256
+ "epoch": 0.3076923076923077,
257
+ "eval_logits/chosen": -0.4914375841617584,
258
+ "eval_logits/rejected": -0.4883214831352234,
259
+ "eval_logps/chosen": -43.38737869262695,
260
+ "eval_logps/rejected": -43.16010284423828,
261
+ "eval_loss": 0.516257107257843,
262
+ "eval_rewards/accuracies": 0.7397260069847107,
263
+ "eval_rewards/chosen": 0.9090243577957153,
264
+ "eval_rewards/margins": 1.0499579906463623,
265
+ "eval_rewards/rejected": -0.14093351364135742,
266
+ "eval_runtime": 397.9655,
267
+ "eval_samples_per_second": 4.357,
268
+ "eval_steps_per_second": 0.183,
269
+ "step": 25
270
+ },
271
+ {
272
+ "epoch": 0.32,
273
+ "grad_norm": 25.854991033804026,
274
+ "learning_rate": 4.952621399215597e-07,
275
+ "logits/chosen": -0.49028468132019043,
276
+ "logits/rejected": -0.48677563667297363,
277
+ "logps/chosen": -39.38325500488281,
278
+ "logps/rejected": -30.141273498535156,
279
+ "loss": 0.5432,
280
+ "rewards/accuracies": 0.765625,
281
+ "rewards/chosen": 0.9173061847686768,
282
+ "rewards/margins": 0.6880545020103455,
283
+ "rewards/rejected": 0.2292517125606537,
284
+ "step": 26
285
+ },
286
+ {
287
+ "epoch": 0.3446153846153846,
288
+ "grad_norm": 22.546249636399228,
289
+ "learning_rate": 4.929335305436764e-07,
290
+ "logits/chosen": -0.4758816063404083,
291
+ "logits/rejected": -0.47193384170532227,
292
+ "logps/chosen": -37.941322326660156,
293
+ "logps/rejected": -36.9672966003418,
294
+ "loss": 0.4989,
295
+ "rewards/accuracies": 0.8125,
296
+ "rewards/chosen": 0.9205312132835388,
297
+ "rewards/margins": 0.96444171667099,
298
+ "rewards/rejected": -0.0439104288816452,
299
+ "step": 28
300
+ },
301
+ {
302
+ "epoch": 0.36923076923076925,
303
+ "grad_norm": 25.277496467379798,
304
+ "learning_rate": 4.901488388458247e-07,
305
+ "logits/chosen": -0.47848933935165405,
306
+ "logits/rejected": -0.4744369089603424,
307
+ "logps/chosen": -42.54041290283203,
308
+ "logps/rejected": -32.181236267089844,
309
+ "loss": 0.5002,
310
+ "rewards/accuracies": 0.75,
311
+ "rewards/chosen": 0.949561595916748,
312
+ "rewards/margins": 0.7569789886474609,
313
+ "rewards/rejected": 0.19258266687393188,
314
+ "step": 30
315
+ },
316
+ {
317
+ "epoch": 0.36923076923076925,
318
+ "eval_logits/chosen": -0.48240911960601807,
319
+ "eval_logits/rejected": -0.47941771149635315,
320
+ "eval_logps/chosen": -42.97114944458008,
321
+ "eval_logps/rejected": -43.50387191772461,
322
+ "eval_loss": 0.4467584788799286,
323
+ "eval_rewards/accuracies": 0.75,
324
+ "eval_rewards/chosen": 1.1171396970748901,
325
+ "eval_rewards/margins": 1.429957389831543,
326
+ "eval_rewards/rejected": -0.3128177523612976,
327
+ "eval_runtime": 399.6945,
328
+ "eval_samples_per_second": 4.338,
329
+ "eval_steps_per_second": 0.183,
330
+ "step": 30
331
+ },
332
+ {
333
+ "epoch": 0.39384615384615385,
334
+ "grad_norm": 21.934080715012698,
335
+ "learning_rate": 4.869132927957006e-07,
336
+ "logits/chosen": -0.47030550241470337,
337
+ "logits/rejected": -0.4669092893600464,
338
+ "logps/chosen": -39.564884185791016,
339
+ "logps/rejected": -46.268707275390625,
340
+ "loss": 0.4561,
341
+ "rewards/accuracies": 0.71875,
342
+ "rewards/chosen": 1.1370558738708496,
343
+ "rewards/margins": 1.5885730981826782,
344
+ "rewards/rejected": -0.45151710510253906,
345
+ "step": 32
346
+ },
347
+ {
348
+ "epoch": 0.41846153846153844,
349
+ "grad_norm": 17.9530062267105,
350
+ "learning_rate": 4.832329667929376e-07,
351
+ "logits/chosen": -0.48381492495536804,
352
+ "logits/rejected": -0.47740432620048523,
353
+ "logps/chosen": -37.79485321044922,
354
+ "logps/rejected": -41.405555725097656,
355
+ "loss": 0.3974,
356
+ "rewards/accuracies": 0.890625,
357
+ "rewards/chosen": 1.0800038576126099,
358
+ "rewards/margins": 1.8046053647994995,
359
+ "rewards/rejected": -0.7246013879776001,
360
+ "step": 34
361
+ },
362
+ {
363
+ "epoch": 0.4307692307692308,
364
+ "eval_logits/chosen": -0.4790378212928772,
365
+ "eval_logits/rejected": -0.4759667217731476,
366
+ "eval_logps/chosen": -42.852088928222656,
367
+ "eval_logps/rejected": -43.939449310302734,
368
+ "eval_loss": 0.3961867392063141,
369
+ "eval_rewards/accuracies": 0.7602739930152893,
370
+ "eval_rewards/chosen": 1.1766709089279175,
371
+ "eval_rewards/margins": 1.7072778940200806,
372
+ "eval_rewards/rejected": -0.5306068062782288,
373
+ "eval_runtime": 402.2584,
374
+ "eval_samples_per_second": 4.311,
375
+ "eval_steps_per_second": 0.181,
376
+ "step": 35
377
+ },
378
+ {
379
+ "epoch": 0.4430769230769231,
380
+ "grad_norm": 18.01450637894319,
381
+ "learning_rate": 4.791147702650565e-07,
382
+ "logits/chosen": -0.4862886071205139,
383
+ "logits/rejected": -0.48092207312583923,
384
+ "logps/chosen": -40.004295349121094,
385
+ "logps/rejected": -39.387386322021484,
386
+ "loss": 0.4067,
387
+ "rewards/accuracies": 0.84375,
388
+ "rewards/chosen": 1.3134092092514038,
389
+ "rewards/margins": 1.8237063884735107,
390
+ "rewards/rejected": -0.5102972984313965,
391
+ "step": 36
392
+ },
393
+ {
394
+ "epoch": 0.4676923076923077,
395
+ "grad_norm": 18.54804560370934,
396
+ "learning_rate": 4.745664346957361e-07,
397
+ "logits/chosen": -0.46789872646331787,
398
+ "logits/rejected": -0.4679996967315674,
399
+ "logps/chosen": -42.09858322143555,
400
+ "logps/rejected": -31.129802703857422,
401
+ "loss": 0.3702,
402
+ "rewards/accuracies": 0.796875,
403
+ "rewards/chosen": 1.3026843070983887,
404
+ "rewards/margins": 1.3334376811981201,
405
+ "rewards/rejected": -0.030753374099731445,
406
+ "step": 38
407
+ },
408
+ {
409
+ "epoch": 0.49230769230769234,
410
+ "grad_norm": 13.439566896592863,
411
+ "learning_rate": 4.695964991097616e-07,
412
+ "logits/chosen": -0.449634313583374,
413
+ "logits/rejected": -0.4447551965713501,
414
+ "logps/chosen": -36.52521514892578,
415
+ "logps/rejected": -42.37124252319336,
416
+ "loss": 0.3514,
417
+ "rewards/accuracies": 0.859375,
418
+ "rewards/chosen": 1.1417609453201294,
419
+ "rewards/margins": 1.7916193008422852,
420
+ "rewards/rejected": -0.6498584747314453,
421
+ "step": 40
422
+ },
423
+ {
424
+ "epoch": 0.49230769230769234,
425
+ "eval_logits/chosen": -0.47975781559944153,
426
+ "eval_logits/rejected": -0.47623777389526367,
427
+ "eval_logps/chosen": -42.85760498046875,
428
+ "eval_logps/rejected": -44.426246643066406,
429
+ "eval_loss": 0.35519111156463623,
430
+ "eval_rewards/accuracies": 0.7876712083816528,
431
+ "eval_rewards/chosen": 1.1739110946655273,
432
+ "eval_rewards/margins": 1.9479175806045532,
433
+ "eval_rewards/rejected": -0.7740064859390259,
434
+ "eval_runtime": 398.0514,
435
+ "eval_samples_per_second": 4.356,
436
+ "eval_steps_per_second": 0.183,
437
+ "step": 40
438
+ },
439
+ {
440
+ "epoch": 0.5169230769230769,
441
+ "grad_norm": 14.688902217061264,
442
+ "learning_rate": 4.642142940418973e-07,
443
+ "logits/chosen": -0.5011200904846191,
444
+ "logits/rejected": -0.49806085228919983,
445
+ "logps/chosen": -37.53756332397461,
446
+ "logps/rejected": -43.31938171386719,
447
+ "loss": 0.3629,
448
+ "rewards/accuracies": 0.828125,
449
+ "rewards/chosen": 1.0989573001861572,
450
+ "rewards/margins": 1.9275836944580078,
451
+ "rewards/rejected": -0.8286263346672058,
452
+ "step": 42
453
+ },
454
+ {
455
+ "epoch": 0.5415384615384615,
456
+ "grad_norm": 14.193796961040897,
457
+ "learning_rate": 4.5842992401978256e-07,
458
+ "logits/chosen": -0.4911167323589325,
459
+ "logits/rejected": -0.4909789264202118,
460
+ "logps/chosen": -46.16688537597656,
461
+ "logps/rejected": -35.1140251159668,
462
+ "loss": 0.316,
463
+ "rewards/accuracies": 0.8125,
464
+ "rewards/chosen": 1.0063791275024414,
465
+ "rewards/margins": 1.4864487648010254,
466
+ "rewards/rejected": -0.48006969690322876,
467
+ "step": 44
468
+ },
469
+ {
470
+ "epoch": 0.5538461538461539,
471
+ "eval_logits/chosen": -0.472987562417984,
472
+ "eval_logits/rejected": -0.46926581859588623,
473
+ "eval_logps/chosen": -42.71507263183594,
474
+ "eval_logps/rejected": -45.037986755371094,
475
+ "eval_loss": 0.31506600975990295,
476
+ "eval_rewards/accuracies": 0.801369845867157,
477
+ "eval_rewards/chosen": 1.2451775074005127,
478
+ "eval_rewards/margins": 2.3250503540039062,
479
+ "eval_rewards/rejected": -1.0798726081848145,
480
+ "eval_runtime": 398.07,
481
+ "eval_samples_per_second": 4.356,
482
+ "eval_steps_per_second": 0.183,
483
+ "step": 45
484
+ },
485
+ {
486
+ "epoch": 0.5661538461538461,
487
+ "grad_norm": 11.94835713175047,
488
+ "learning_rate": 4.5225424859373684e-07,
489
+ "logits/chosen": -0.48655006289482117,
490
+ "logits/rejected": -0.4802987575531006,
491
+ "logps/chosen": -45.382694244384766,
492
+ "logps/rejected": -34.198219299316406,
493
+ "loss": 0.2964,
494
+ "rewards/accuracies": 0.828125,
495
+ "rewards/chosen": 1.3039376735687256,
496
+ "rewards/margins": 1.7199229001998901,
497
+ "rewards/rejected": -0.4159852862358093,
498
+ "step": 46
499
+ },
500
+ {
501
+ "epoch": 0.5907692307692308,
502
+ "grad_norm": 11.665118422994077,
503
+ "learning_rate": 4.456988619490889e-07,
504
+ "logits/chosen": -0.4525223672389984,
505
+ "logits/rejected": -0.4487767517566681,
506
+ "logps/chosen": -42.471214294433594,
507
+ "logps/rejected": -37.61225128173828,
508
+ "loss": 0.2878,
509
+ "rewards/accuracies": 0.828125,
510
+ "rewards/chosen": 1.1848539113998413,
511
+ "rewards/margins": 1.884321928024292,
512
+ "rewards/rejected": -0.6994677782058716,
513
+ "step": 48
514
+ },
515
+ {
516
+ "epoch": 0.6153846153846154,
517
+ "grad_norm": 11.208056217338184,
518
+ "learning_rate": 4.3877607113930516e-07,
519
+ "logits/chosen": -0.4693542420864105,
520
+ "logits/rejected": -0.46645450592041016,
521
+ "logps/chosen": -38.541446685791016,
522
+ "logps/rejected": -28.433433532714844,
523
+ "loss": 0.2786,
524
+ "rewards/accuracies": 0.875,
525
+ "rewards/chosen": 1.3120430707931519,
526
+ "rewards/margins": 1.601030945777893,
527
+ "rewards/rejected": -0.2889878749847412,
528
+ "step": 50
529
+ },
530
+ {
531
+ "epoch": 0.6153846153846154,
532
+ "eval_logits/chosen": -0.46347275376319885,
533
+ "eval_logits/rejected": -0.45981672406196594,
534
+ "eval_logps/chosen": -42.53354263305664,
535
+ "eval_logps/rejected": -45.56825256347656,
536
+ "eval_loss": 0.28788629174232483,
537
+ "eval_rewards/accuracies": 0.818493127822876,
538
+ "eval_rewards/chosen": 1.335942029953003,
539
+ "eval_rewards/margins": 2.6809515953063965,
540
+ "eval_rewards/rejected": -1.345009684562683,
541
+ "eval_runtime": 398.8881,
542
+ "eval_samples_per_second": 4.347,
543
+ "eval_steps_per_second": 0.183,
544
+ "step": 50
545
+ },
546
+ {
547
+ "epoch": 0.64,
548
+ "grad_norm": 12.527059053107184,
549
+ "learning_rate": 4.314988729807827e-07,
550
+ "logits/chosen": -0.46812835335731506,
551
+ "logits/rejected": -0.4670564532279968,
552
+ "logps/chosen": -39.73173904418945,
553
+ "logps/rejected": -31.038818359375,
554
+ "loss": 0.277,
555
+ "rewards/accuracies": 0.84375,
556
+ "rewards/chosen": 1.1246474981307983,
557
+ "rewards/margins": 1.668458342552185,
558
+ "rewards/rejected": -0.5438107252120972,
559
+ "step": 52
560
+ },
561
+ {
562
+ "epoch": 0.6646153846153846,
563
+ "grad_norm": 8.553726428740159,
564
+ "learning_rate": 4.238809296526846e-07,
565
+ "logits/chosen": -0.4466942548751831,
566
+ "logits/rejected": -0.4432934820652008,
567
+ "logps/chosen": -40.79918670654297,
568
+ "logps/rejected": -43.690181732177734,
569
+ "loss": 0.2475,
570
+ "rewards/accuracies": 0.8125,
571
+ "rewards/chosen": 1.295045256614685,
572
+ "rewards/margins": 2.5195388793945312,
573
+ "rewards/rejected": -1.2244938611984253,
574
+ "step": 54
575
+ },
576
+ {
577
+ "epoch": 0.676923076923077,
578
+ "eval_logits/chosen": -0.4640098512172699,
579
+ "eval_logits/rejected": -0.4600638151168823,
580
+ "eval_logps/chosen": -42.25896453857422,
581
+ "eval_logps/rejected": -45.97068786621094,
582
+ "eval_loss": 0.26564928889274597,
583
+ "eval_rewards/accuracies": 0.818493127822876,
584
+ "eval_rewards/chosen": 1.47323477268219,
585
+ "eval_rewards/margins": 3.019458293914795,
586
+ "eval_rewards/rejected": -1.5462236404418945,
587
+ "eval_runtime": 397.9539,
588
+ "eval_samples_per_second": 4.357,
589
+ "eval_steps_per_second": 0.183,
590
+ "step": 55
591
+ },
592
+ {
593
+ "epoch": 0.6892307692307692,
594
+ "grad_norm": 10.956104922328452,
595
+ "learning_rate": 4.159365430476261e-07,
596
+ "logits/chosen": -0.45851805806159973,
597
+ "logits/rejected": -0.4554572105407715,
598
+ "logps/chosen": -39.361473083496094,
599
+ "logps/rejected": -40.478187561035156,
600
+ "loss": 0.2571,
601
+ "rewards/accuracies": 0.875,
602
+ "rewards/chosen": 1.2780680656433105,
603
+ "rewards/margins": 2.710892915725708,
604
+ "rewards/rejected": -1.4328248500823975,
605
+ "step": 56
606
+ },
607
+ {
608
+ "epoch": 0.7138461538461538,
609
+ "grad_norm": 8.998450490495424,
610
+ "learning_rate": 4.076806279213655e-07,
611
+ "logits/chosen": -0.44970858097076416,
612
+ "logits/rejected": -0.4487365484237671,
613
+ "logps/chosen": -39.6558723449707,
614
+ "logps/rejected": -30.797224044799805,
615
+ "loss": 0.2377,
616
+ "rewards/accuracies": 0.859375,
617
+ "rewards/chosen": 1.423360824584961,
618
+ "rewards/margins": 2.117220163345337,
619
+ "rewards/rejected": -0.6938591003417969,
620
+ "step": 58
621
+ },
622
+ {
623
+ "epoch": 0.7384615384615385,
624
+ "grad_norm": 7.997921172873259,
625
+ "learning_rate": 3.991286838919086e-07,
626
+ "logits/chosen": -0.45197734236717224,
627
+ "logits/rejected": -0.4494423270225525,
628
+ "logps/chosen": -39.821556091308594,
629
+ "logps/rejected": -30.172922134399414,
630
+ "loss": 0.2308,
631
+ "rewards/accuracies": 0.84375,
632
+ "rewards/chosen": 1.3537344932556152,
633
+ "rewards/margins": 2.035463571548462,
634
+ "rewards/rejected": -0.6817290186882019,
635
+ "step": 60
636
+ },
637
+ {
638
+ "epoch": 0.7384615384615385,
639
+ "eval_logits/chosen": -0.4599209725856781,
640
+ "eval_logits/rejected": -0.45611828565597534,
641
+ "eval_logps/chosen": -41.97780990600586,
642
+ "eval_logps/rejected": -46.275150299072266,
643
+ "eval_loss": 0.25006040930747986,
644
+ "eval_rewards/accuracies": 0.8219178318977356,
645
+ "eval_rewards/chosen": 1.6138089895248413,
646
+ "eval_rewards/margins": 3.3122646808624268,
647
+ "eval_rewards/rejected": -1.698456048965454,
648
+ "eval_runtime": 400.364,
649
+ "eval_samples_per_second": 4.331,
650
+ "eval_steps_per_second": 0.182,
651
+ "step": 60
652
+ },
653
+ {
654
+ "epoch": 0.7630769230769231,
655
+ "grad_norm": 8.82980837299946,
656
+ "learning_rate": 3.902967663405956e-07,
657
+ "logits/chosen": -0.4859851002693176,
658
+ "logits/rejected": -0.48067888617515564,
659
+ "logps/chosen": -41.49687194824219,
660
+ "logps/rejected": -52.49277114868164,
661
+ "loss": 0.2392,
662
+ "rewards/accuracies": 0.84375,
663
+ "rewards/chosen": 1.802024483680725,
664
+ "rewards/margins": 3.978426456451416,
665
+ "rewards/rejected": -2.1764023303985596,
666
+ "step": 62
667
+ },
668
+ {
669
+ "epoch": 0.7876923076923077,
670
+ "grad_norm": 9.426495089553443,
671
+ "learning_rate": 3.8120145626980015e-07,
672
+ "logits/chosen": -0.4271140694618225,
673
+ "logits/rejected": -0.42757901549339294,
674
+ "logps/chosen": -42.58659744262695,
675
+ "logps/rejected": -34.0533561706543,
676
+ "loss": 0.2168,
677
+ "rewards/accuracies": 0.828125,
678
+ "rewards/chosen": 1.4099901914596558,
679
+ "rewards/margins": 2.444286346435547,
680
+ "rewards/rejected": -1.0342963933944702,
681
+ "step": 64
682
+ },
683
+ {
684
+ "epoch": 0.8,
685
+ "eval_logits/chosen": -0.45980003476142883,
686
+ "eval_logits/rejected": -0.4559204876422882,
687
+ "eval_logps/chosen": -41.840850830078125,
688
+ "eval_logps/rejected": -46.55259323120117,
689
+ "eval_loss": 0.24060946702957153,
690
+ "eval_rewards/accuracies": 0.8287671208381653,
691
+ "eval_rewards/chosen": 1.6822888851165771,
692
+ "eval_rewards/margins": 3.519465208053589,
693
+ "eval_rewards/rejected": -1.8371765613555908,
694
+ "eval_runtime": 400.9472,
695
+ "eval_samples_per_second": 4.325,
696
+ "eval_steps_per_second": 0.182,
697
+ "step": 65
698
+ },
699
+ {
700
+ "epoch": 0.8123076923076923,
701
+ "grad_norm": 7.070039356412563,
702
+ "learning_rate": 3.718598291738298e-07,
703
+ "logits/chosen": -0.44447261095046997,
704
+ "logits/rejected": -0.44296732544898987,
705
+ "logps/chosen": -37.90412902832031,
706
+ "logps/rejected": -36.56926727294922,
707
+ "loss": 0.2129,
708
+ "rewards/accuracies": 0.828125,
709
+ "rewards/chosen": 1.5699365139007568,
710
+ "rewards/margins": 2.4697201251983643,
711
+ "rewards/rejected": -0.899783730506897,
712
+ "step": 66
713
+ },
714
+ {
715
+ "epoch": 0.8369230769230769,
716
+ "grad_norm": 7.560682128141536,
717
+ "learning_rate": 3.622894229814698e-07,
718
+ "logits/chosen": -0.4488967955112457,
719
+ "logits/rejected": -0.44460177421569824,
720
+ "logps/chosen": -38.6946907043457,
721
+ "logps/rejected": -46.51150894165039,
722
+ "loss": 0.2148,
723
+ "rewards/accuracies": 0.875,
724
+ "rewards/chosen": 1.4161229133605957,
725
+ "rewards/margins": 3.171121835708618,
726
+ "rewards/rejected": -1.754999041557312,
727
+ "step": 68
728
+ },
729
+ {
730
+ "epoch": 0.8615384615384616,
731
+ "grad_norm": 8.217469156292612,
732
+ "learning_rate": 3.52508205130354e-07,
733
+ "logits/chosen": -0.45258235931396484,
734
+ "logits/rejected": -0.44472819566726685,
735
+ "logps/chosen": -41.79392623901367,
736
+ "logps/rejected": -49.990150451660156,
737
+ "loss": 0.2015,
738
+ "rewards/accuracies": 0.90625,
739
+ "rewards/chosen": 1.6353414058685303,
740
+ "rewards/margins": 4.414577960968018,
741
+ "rewards/rejected": -2.779236316680908,
742
+ "step": 70
743
+ },
744
+ {
745
+ "epoch": 0.8615384615384616,
746
+ "eval_logits/chosen": -0.45598042011260986,
747
+ "eval_logits/rejected": -0.4519526958465576,
748
+ "eval_logps/chosen": -41.811824798583984,
749
+ "eval_logps/rejected": -46.88448715209961,
750
+ "eval_loss": 0.23113983869552612,
751
+ "eval_rewards/accuracies": 0.835616409778595,
752
+ "eval_rewards/chosen": 1.696802020072937,
753
+ "eval_rewards/margins": 3.699925661087036,
754
+ "eval_rewards/rejected": -2.0031237602233887,
755
+ "eval_runtime": 397.814,
756
+ "eval_samples_per_second": 4.359,
757
+ "eval_steps_per_second": 0.184,
758
+ "step": 70
759
+ },
760
+ {
761
+ "epoch": 0.8861538461538462,
762
+ "grad_norm": 9.912800042727127,
763
+ "learning_rate": 3.4253453883497864e-07,
764
+ "logits/chosen": -0.46372586488723755,
765
+ "logits/rejected": -0.45979729294776917,
766
+ "logps/chosen": -38.839900970458984,
767
+ "logps/rejected": -39.225364685058594,
768
+ "loss": 0.1988,
769
+ "rewards/accuracies": 0.890625,
770
+ "rewards/chosen": 1.1858875751495361,
771
+ "rewards/margins": 3.046271562576294,
772
+ "rewards/rejected": -1.8603839874267578,
773
+ "step": 72
774
+ },
775
+ {
776
+ "epoch": 0.9107692307692308,
777
+ "grad_norm": 8.369939820458308,
778
+ "learning_rate": 3.323871486116851e-07,
779
+ "logits/chosen": -0.4712873697280884,
780
+ "logits/rejected": -0.4673464894294739,
781
+ "logps/chosen": -38.399444580078125,
782
+ "logps/rejected": -31.575439453125,
783
+ "loss": 0.2334,
784
+ "rewards/accuracies": 0.859375,
785
+ "rewards/chosen": 1.5223883390426636,
786
+ "rewards/margins": 2.529682159423828,
787
+ "rewards/rejected": -1.0072938203811646,
788
+ "step": 74
789
+ },
790
+ {
791
+ "epoch": 0.9230769230769231,
792
+ "eval_logits/chosen": -0.4527950584888458,
793
+ "eval_logits/rejected": -0.44881972670555115,
794
+ "eval_logps/chosen": -41.899085998535156,
795
+ "eval_logps/rejected": -47.179649353027344,
796
+ "eval_loss": 0.2244771271944046,
797
+ "eval_rewards/accuracies": 0.8424657583236694,
798
+ "eval_rewards/chosen": 1.6531713008880615,
799
+ "eval_rewards/margins": 3.803877830505371,
800
+ "eval_rewards/rejected": -2.1507065296173096,
801
+ "eval_runtime": 398.9461,
802
+ "eval_samples_per_second": 4.346,
803
+ "eval_steps_per_second": 0.183,
804
+ "step": 75
805
+ },
806
+ {
807
+ "epoch": 0.9353846153846154,
808
+ "grad_norm": 9.041284945416068,
809
+ "learning_rate": 3.220850851253377e-07,
810
+ "logits/chosen": -0.48120999336242676,
811
+ "logits/rejected": -0.476688027381897,
812
+ "logps/chosen": -41.46022415161133,
813
+ "logps/rejected": -46.435829162597656,
814
+ "loss": 0.232,
815
+ "rewards/accuracies": 0.875,
816
+ "rewards/chosen": 1.5242944955825806,
817
+ "rewards/margins": 3.8246002197265625,
818
+ "rewards/rejected": -2.3003056049346924,
819
+ "step": 76
820
+ },
821
+ {
822
+ "epoch": 0.96,
823
+ "grad_norm": 5.93400233545759,
824
+ "learning_rate": 3.1164768942369053e-07,
825
+ "logits/chosen": -0.4919479787349701,
826
+ "logits/rejected": -0.4866538941860199,
827
+ "logps/chosen": -35.453460693359375,
828
+ "logps/rejected": -63.363746643066406,
829
+ "loss": 0.2211,
830
+ "rewards/accuracies": 0.84375,
831
+ "rewards/chosen": 1.4180715084075928,
832
+ "rewards/margins": 4.756035804748535,
833
+ "rewards/rejected": -3.3379647731781006,
834
+ "step": 78
835
+ },
836
+ {
837
+ "epoch": 0.9846153846153847,
838
+ "grad_norm": 6.808355501392862,
839
+ "learning_rate": 3.010945566265912e-07,
840
+ "logits/chosen": -0.4714447855949402,
841
+ "logits/rejected": -0.46636244654655457,
842
+ "logps/chosen": -44.541507720947266,
843
+ "logps/rejected": -33.366180419921875,
844
+ "loss": 0.2075,
845
+ "rewards/accuracies": 0.875,
846
+ "rewards/chosen": 1.5286719799041748,
847
+ "rewards/margins": 2.761920928955078,
848
+ "rewards/rejected": -1.233249306678772,
849
+ "step": 80
850
+ },
851
+ {
852
+ "epoch": 0.9846153846153847,
853
+ "eval_logits/chosen": -0.45233336091041565,
854
+ "eval_logits/rejected": -0.44827964901924133,
855
+ "eval_logps/chosen": -41.91486740112305,
856
+ "eval_logps/rejected": -47.406375885009766,
857
+ "eval_loss": 0.2178829461336136,
858
+ "eval_rewards/accuracies": 0.8458904027938843,
859
+ "eval_rewards/chosen": 1.645279884338379,
860
+ "eval_rewards/margins": 3.909348964691162,
861
+ "eval_rewards/rejected": -2.2640695571899414,
862
+ "eval_runtime": 398.4802,
863
+ "eval_samples_per_second": 4.352,
864
+ "eval_steps_per_second": 0.183,
865
+ "step": 80
866
+ },
867
+ {
868
+ "epoch": 1.0092307692307692,
869
+ "grad_norm": 7.772486195689839,
870
+ "learning_rate": 2.9044549913819124e-07,
871
+ "logits/chosen": -0.4541618227958679,
872
+ "logits/rejected": -0.4516042470932007,
873
+ "logps/chosen": -42.46653747558594,
874
+ "logps/rejected": -42.07926940917969,
875
+ "loss": 0.1903,
876
+ "rewards/accuracies": 0.890625,
877
+ "rewards/chosen": 1.5551599264144897,
878
+ "rewards/margins": 3.8793962001800537,
879
+ "rewards/rejected": -2.3242363929748535,
880
+ "step": 82
881
+ },
882
+ {
883
+ "epoch": 1.0338461538461539,
884
+ "grad_norm": 5.782441199522606,
885
+ "learning_rate": 2.797205094512266e-07,
886
+ "logits/chosen": -0.43424034118652344,
887
+ "logits/rejected": -0.42985162138938904,
888
+ "logps/chosen": -41.127593994140625,
889
+ "logps/rejected": -45.52932357788086,
890
+ "loss": 0.1885,
891
+ "rewards/accuracies": 0.890625,
892
+ "rewards/chosen": 1.399495005607605,
893
+ "rewards/margins": 3.8588943481445312,
894
+ "rewards/rejected": -2.4593992233276367,
895
+ "step": 84
896
+ },
897
+ {
898
+ "epoch": 1.0461538461538462,
899
+ "eval_logits/chosen": -0.45614686608314514,
900
+ "eval_logits/rejected": -0.45181941986083984,
901
+ "eval_logps/chosen": -41.860015869140625,
902
+ "eval_logps/rejected": -47.55991744995117,
903
+ "eval_loss": 0.21262364089488983,
904
+ "eval_rewards/accuracies": 0.8493150472640991,
905
+ "eval_rewards/chosen": 1.6727051734924316,
906
+ "eval_rewards/margins": 4.013547897338867,
907
+ "eval_rewards/rejected": -2.3408427238464355,
908
+ "eval_runtime": 401.4695,
909
+ "eval_samples_per_second": 4.319,
910
+ "eval_steps_per_second": 0.182,
911
+ "step": 85
912
+ },
913
+ {
914
+ "epoch": 1.0584615384615386,
915
+ "grad_norm": 4.97735971019092,
916
+ "learning_rate": 2.6893972261320264e-07,
917
+ "logits/chosen": -0.42422690987586975,
918
+ "logits/rejected": -0.4213142693042755,
919
+ "logps/chosen": -39.9366340637207,
920
+ "logps/rejected": -40.46057891845703,
921
+ "loss": 0.1749,
922
+ "rewards/accuracies": 0.953125,
923
+ "rewards/chosen": 1.7523412704467773,
924
+ "rewards/margins": 3.447618007659912,
925
+ "rewards/rejected": -1.6952769756317139,
926
+ "step": 86
927
+ },
928
+ {
929
+ "epoch": 1.083076923076923,
930
+ "grad_norm": 5.4006356704238465,
931
+ "learning_rate": 2.5812337842494516e-07,
932
+ "logits/chosen": -0.4536130726337433,
933
+ "logits/rejected": -0.45070070028305054,
934
+ "logps/chosen": -41.13835906982422,
935
+ "logps/rejected": -36.39931106567383,
936
+ "loss": 0.1732,
937
+ "rewards/accuracies": 0.875,
938
+ "rewards/chosen": 1.473576545715332,
939
+ "rewards/margins": 3.01824688911438,
940
+ "rewards/rejected": -1.5446703433990479,
941
+ "step": 88
942
+ },
943
+ {
944
+ "epoch": 1.1076923076923078,
945
+ "grad_norm": 5.007052371554692,
946
+ "learning_rate": 2.4729178344249006e-07,
947
+ "logits/chosen": -0.45735257863998413,
948
+ "logits/rejected": -0.45181047916412354,
949
+ "logps/chosen": -39.739837646484375,
950
+ "logps/rejected": -40.88286590576172,
951
+ "loss": 0.1838,
952
+ "rewards/accuracies": 0.859375,
953
+ "rewards/chosen": 1.5848816633224487,
954
+ "rewards/margins": 3.2226974964141846,
955
+ "rewards/rejected": -1.6378157138824463,
956
+ "step": 90
957
+ },
958
+ {
959
+ "epoch": 1.1076923076923078,
960
+ "eval_logits/chosen": -0.44746431708335876,
961
+ "eval_logits/rejected": -0.44344425201416016,
962
+ "eval_logps/chosen": -41.768062591552734,
963
+ "eval_logps/rejected": -47.707096099853516,
964
+ "eval_loss": 0.20802097022533417,
965
+ "eval_rewards/accuracies": 0.8527397513389587,
966
+ "eval_rewards/chosen": 1.7186800241470337,
967
+ "eval_rewards/margins": 4.133108615875244,
968
+ "eval_rewards/rejected": -2.414428472518921,
969
+ "eval_runtime": 398.368,
970
+ "eval_samples_per_second": 4.353,
971
+ "eval_steps_per_second": 0.183,
972
+ "step": 90
973
+ },
974
+ {
975
+ "epoch": 1.1323076923076922,
976
+ "grad_norm": 5.422550680740592,
977
+ "learning_rate": 2.3646527285364563e-07,
978
+ "logits/chosen": -0.45245465636253357,
979
+ "logits/rejected": -0.4461210072040558,
980
+ "logps/chosen": -40.16954040527344,
981
+ "logps/rejected": -40.06535339355469,
982
+ "loss": 0.1908,
983
+ "rewards/accuracies": 0.875,
984
+ "rewards/chosen": 1.9867323637008667,
985
+ "rewards/margins": 3.7591819763183594,
986
+ "rewards/rejected": -1.7724494934082031,
987
+ "step": 92
988
+ },
989
+ {
990
+ "epoch": 1.156923076923077,
991
+ "grad_norm": 5.242542300523203,
992
+ "learning_rate": 2.256641723008026e-07,
993
+ "logits/chosen": -0.4320615530014038,
994
+ "logits/rejected": -0.4263082444667816,
995
+ "logps/chosen": -37.380733489990234,
996
+ "logps/rejected": -34.89535903930664,
997
+ "loss": 0.2062,
998
+ "rewards/accuracies": 0.859375,
999
+ "rewards/chosen": 1.6694892644882202,
1000
+ "rewards/margins": 3.161728858947754,
1001
+ "rewards/rejected": -1.4922394752502441,
1002
+ "step": 94
1003
+ },
1004
+ {
1005
+ "epoch": 1.1692307692307693,
1006
+ "eval_logits/chosen": -0.4461810886859894,
1007
+ "eval_logits/rejected": -0.44209808111190796,
1008
+ "eval_logps/chosen": -41.719635009765625,
1009
+ "eval_logps/rejected": -47.83554458618164,
1010
+ "eval_loss": 0.20408523082733154,
1011
+ "eval_rewards/accuracies": 0.8595890402793884,
1012
+ "eval_rewards/chosen": 1.7428948879241943,
1013
+ "eval_rewards/margins": 4.221549987792969,
1014
+ "eval_rewards/rejected": -2.4786548614501953,
1015
+ "eval_runtime": 399.3763,
1016
+ "eval_samples_per_second": 4.342,
1017
+ "eval_steps_per_second": 0.183,
1018
+ "step": 95
1019
+ },
1020
+ {
1021
+ "epoch": 1.1815384615384614,
1022
+ "grad_norm": 5.9451928875167805,
1023
+ "learning_rate": 2.1490875972166393e-07,
1024
+ "logits/chosen": -0.471175879240036,
1025
+ "logits/rejected": -0.46583351492881775,
1026
+ "logps/chosen": -34.772342681884766,
1027
+ "logps/rejected": -39.63105392456055,
1028
+ "loss": 0.1776,
1029
+ "rewards/accuracies": 0.84375,
1030
+ "rewards/chosen": 1.5616707801818848,
1031
+ "rewards/margins": 3.4220144748687744,
1032
+ "rewards/rejected": -1.8603436946868896,
1033
+ "step": 96
1034
+ },
1035
+ {
1036
+ "epoch": 1.2061538461538461,
1037
+ "grad_norm": 5.4858414240825075,
1038
+ "learning_rate": 2.0421922727953595e-07,
1039
+ "logits/chosen": -0.4286724925041199,
1040
+ "logits/rejected": -0.4263175427913666,
1041
+ "logps/chosen": -36.57586669921875,
1042
+ "logps/rejected": -31.930763244628906,
1043
+ "loss": 0.1655,
1044
+ "rewards/accuracies": 0.84375,
1045
+ "rewards/chosen": 1.753572940826416,
1046
+ "rewards/margins": 2.9586939811706543,
1047
+ "rewards/rejected": -1.2051211595535278,
1048
+ "step": 98
1049
+ },
1050
+ {
1051
+ "epoch": 1.2307692307692308,
1052
+ "grad_norm": 5.720592020809117,
1053
+ "learning_rate": 1.9361564345465145e-07,
1054
+ "logits/chosen": -0.4527323842048645,
1055
+ "logits/rejected": -0.44913169741630554,
1056
+ "logps/chosen": -34.954219818115234,
1057
+ "logps/rejected": -37.62247085571289,
1058
+ "loss": 0.1794,
1059
+ "rewards/accuracies": 0.84375,
1060
+ "rewards/chosen": 1.6976581811904907,
1061
+ "rewards/margins": 3.37191104888916,
1062
+ "rewards/rejected": -1.674253225326538,
1063
+ "step": 100
1064
+ },
1065
+ {
1066
+ "epoch": 1.2307692307692308,
1067
+ "eval_logits/chosen": -0.446783185005188,
1068
+ "eval_logits/rejected": -0.44245585799217224,
1069
+ "eval_logps/chosen": -41.71480178833008,
1070
+ "eval_logps/rejected": -47.994571685791016,
1071
+ "eval_loss": 0.20066353678703308,
1072
+ "eval_rewards/accuracies": 0.8561643958091736,
1073
+ "eval_rewards/chosen": 1.745313048362732,
1074
+ "eval_rewards/margins": 4.30348014831543,
1075
+ "eval_rewards/rejected": -2.558166742324829,
1076
+ "eval_runtime": 400.8371,
1077
+ "eval_samples_per_second": 4.326,
1078
+ "eval_steps_per_second": 0.182,
1079
+ "step": 100
1080
+ },
1081
+ {
1082
+ "epoch": 1.2553846153846153,
1083
+ "grad_norm": 5.495318529151337,
1084
+ "learning_rate": 1.8311791536769483e-07,
1085
+ "logits/chosen": -0.4154907763004303,
1086
+ "logits/rejected": -0.41147375106811523,
1087
+ "logps/chosen": -37.79768753051758,
1088
+ "logps/rejected": -48.32915496826172,
1089
+ "loss": 0.1734,
1090
+ "rewards/accuracies": 0.90625,
1091
+ "rewards/chosen": 1.4379969835281372,
1092
+ "rewards/margins": 4.379321098327637,
1093
+ "rewards/rejected": -2.94132399559021,
1094
+ "step": 102
1095
+ },
1096
+ {
1097
+ "epoch": 1.28,
1098
+ "grad_norm": 5.371136709550833,
1099
+ "learning_rate": 1.7274575140626315e-07,
1100
+ "logits/chosen": -0.44362062215805054,
1101
+ "logits/rejected": -0.4401937425136566,
1102
+ "logps/chosen": -38.68498992919922,
1103
+ "logps/rejected": -41.304710388183594,
1104
+ "loss": 0.1857,
1105
+ "rewards/accuracies": 0.875,
1106
+ "rewards/chosen": 1.9046180248260498,
1107
+ "rewards/margins": 3.721491813659668,
1108
+ "rewards/rejected": -1.8168736696243286,
1109
+ "step": 104
1110
+ },
1111
+ {
1112
+ "epoch": 1.2923076923076924,
1113
+ "eval_logits/chosen": -0.44231978058815,
1114
+ "eval_logits/rejected": -0.43791159987449646,
1115
+ "eval_logps/chosen": -41.76345443725586,
1116
+ "eval_logps/rejected": -48.174468994140625,
1117
+ "eval_loss": 0.1971779763698578,
1118
+ "eval_rewards/accuracies": 0.8595890402793884,
1119
+ "eval_rewards/chosen": 1.7209877967834473,
1120
+ "eval_rewards/margins": 4.369105339050293,
1121
+ "eval_rewards/rejected": -2.6481170654296875,
1122
+ "eval_runtime": 400.535,
1123
+ "eval_samples_per_second": 4.329,
1124
+ "eval_steps_per_second": 0.182,
1125
+ "step": 105
1126
+ },
1127
+ {
1128
+ "epoch": 1.3046153846153845,
1129
+ "grad_norm": 5.888788259237849,
1130
+ "learning_rate": 1.6251862422442788e-07,
1131
+ "logits/chosen": -0.4513101577758789,
1132
+ "logits/rejected": -0.4436649978160858,
1133
+ "logps/chosen": -40.830169677734375,
1134
+ "logps/rejected": -56.550262451171875,
1135
+ "loss": 0.1686,
1136
+ "rewards/accuracies": 0.890625,
1137
+ "rewards/chosen": 1.6578915119171143,
1138
+ "rewards/margins": 4.946527481079102,
1139
+ "rewards/rejected": -3.2886364459991455,
1140
+ "step": 106
1141
+ },
1142
+ {
1143
+ "epoch": 1.3292307692307692,
1144
+ "grad_norm": 6.612486006770989,
1145
+ "learning_rate": 1.5245573418486135e-07,
1146
+ "logits/chosen": -0.44994574785232544,
1147
+ "logits/rejected": -0.4477950930595398,
1148
+ "logps/chosen": -38.80488586425781,
1149
+ "logps/rejected": -41.393211364746094,
1150
+ "loss": 0.1875,
1151
+ "rewards/accuracies": 0.84375,
1152
+ "rewards/chosen": 1.4876614809036255,
1153
+ "rewards/margins": 3.6893627643585205,
1154
+ "rewards/rejected": -2.2017014026641846,
1155
+ "step": 108
1156
+ },
1157
+ {
1158
+ "epoch": 1.353846153846154,
1159
+ "grad_norm": 4.656660129291058,
1160
+ "learning_rate": 1.4257597331216208e-07,
1161
+ "logits/chosen": -0.4406875669956207,
1162
+ "logits/rejected": -0.43555599451065063,
1163
+ "logps/chosen": -42.75905990600586,
1164
+ "logps/rejected": -54.37929153442383,
1165
+ "loss": 0.1647,
1166
+ "rewards/accuracies": 0.9375,
1167
+ "rewards/chosen": 1.7431519031524658,
1168
+ "rewards/margins": 5.2682318687438965,
1169
+ "rewards/rejected": -3.5250799655914307,
1170
+ "step": 110
1171
+ },
1172
+ {
1173
+ "epoch": 1.353846153846154,
1174
+ "eval_logits/chosen": -0.44183987379074097,
1175
+ "eval_logits/rejected": -0.4375387728214264,
1176
+ "eval_logps/chosen": -41.78536605834961,
1177
+ "eval_logps/rejected": -48.30013656616211,
1178
+ "eval_loss": 0.19461464881896973,
1179
+ "eval_rewards/accuracies": 0.8595890402793884,
1180
+ "eval_rewards/chosen": 1.710031509399414,
1181
+ "eval_rewards/margins": 4.420979976654053,
1182
+ "eval_rewards/rejected": -2.7109484672546387,
1183
+ "eval_runtime": 398.8533,
1184
+ "eval_samples_per_second": 4.347,
1185
+ "eval_steps_per_second": 0.183,
1186
+ "step": 110
1187
+ },
1188
+ {
1189
+ "epoch": 1.3784615384615384,
1190
+ "grad_norm": 4.429475200922326,
1191
+ "learning_rate": 1.328978898250525e-07,
1192
+ "logits/chosen": -0.45591893792152405,
1193
+ "logits/rejected": -0.45284152030944824,
1194
+ "logps/chosen": -37.928653717041016,
1195
+ "logps/rejected": -38.99214172363281,
1196
+ "loss": 0.1596,
1197
+ "rewards/accuracies": 0.953125,
1198
+ "rewards/chosen": 1.702134609222412,
1199
+ "rewards/margins": 3.827345371246338,
1200
+ "rewards/rejected": -2.125210762023926,
1201
+ "step": 112
1202
+ },
1203
+ {
1204
+ "epoch": 1.403076923076923,
1205
+ "grad_norm": 4.278591337865169,
1206
+ "learning_rate": 1.234396533140365e-07,
1207
+ "logits/chosen": -0.42062410712242126,
1208
+ "logits/rejected": -0.41781002283096313,
1209
+ "logps/chosen": -41.77610778808594,
1210
+ "logps/rejected": -48.5487060546875,
1211
+ "loss": 0.1524,
1212
+ "rewards/accuracies": 0.890625,
1213
+ "rewards/chosen": 1.599095106124878,
1214
+ "rewards/margins": 4.177494049072266,
1215
+ "rewards/rejected": -2.578399181365967,
1216
+ "step": 114
1217
+ },
1218
+ {
1219
+ "epoch": 1.4153846153846155,
1220
+ "eval_logits/chosen": -0.44146084785461426,
1221
+ "eval_logits/rejected": -0.437191367149353,
1222
+ "eval_logps/chosen": -41.83814239501953,
1223
+ "eval_logps/rejected": -48.426334381103516,
1224
+ "eval_loss": 0.1936241090297699,
1225
+ "eval_rewards/accuracies": 0.8561643958091736,
1226
+ "eval_rewards/chosen": 1.6836414337158203,
1227
+ "eval_rewards/margins": 4.457688331604004,
1228
+ "eval_rewards/rejected": -2.7740468978881836,
1229
+ "eval_runtime": 398.1677,
1230
+ "eval_samples_per_second": 4.355,
1231
+ "eval_steps_per_second": 0.183,
1232
+ "step": 115
1233
+ },
1234
+ {
1235
+ "epoch": 1.4276923076923076,
1236
+ "grad_norm": 4.8647599997437805,
1237
+ "learning_rate": 1.1421902062989178e-07,
1238
+ "logits/chosen": -0.44749611616134644,
1239
+ "logits/rejected": -0.4411754608154297,
1240
+ "logps/chosen": -43.03426742553711,
1241
+ "logps/rejected": -60.566200256347656,
1242
+ "loss": 0.1551,
1243
+ "rewards/accuracies": 0.90625,
1244
+ "rewards/chosen": 1.691102147102356,
1245
+ "rewards/margins": 5.743535041809082,
1246
+ "rewards/rejected": -4.052433013916016,
1247
+ "step": 116
1248
+ },
1249
+ {
1250
+ "epoch": 1.4523076923076923,
1251
+ "grad_norm": 4.32184296596162,
1252
+ "learning_rate": 1.0525330254703788e-07,
1253
+ "logits/chosen": -0.4085264205932617,
1254
+ "logits/rejected": -0.4047048091888428,
1255
+ "logps/chosen": -40.241764068603516,
1256
+ "logps/rejected": -48.12974166870117,
1257
+ "loss": 0.1681,
1258
+ "rewards/accuracies": 0.796875,
1259
+ "rewards/chosen": 1.730395793914795,
1260
+ "rewards/margins": 4.292797088623047,
1261
+ "rewards/rejected": -2.5624003410339355,
1262
+ "step": 118
1263
+ },
1264
+ {
1265
+ "epoch": 1.476923076923077,
1266
+ "grad_norm": 6.544542585214649,
1267
+ "learning_rate": 9.655933126436563e-08,
1268
+ "logits/chosen": -0.4505607783794403,
1269
+ "logits/rejected": -0.44797009229660034,
1270
+ "logps/chosen": -42.54741668701172,
1271
+ "logps/rejected": -34.47932052612305,
1272
+ "loss": 0.1668,
1273
+ "rewards/accuracies": 0.90625,
1274
+ "rewards/chosen": 1.5434067249298096,
1275
+ "rewards/margins": 3.265448570251465,
1276
+ "rewards/rejected": -1.7220419645309448,
1277
+ "step": 120
1278
+ },
1279
+ {
1280
+ "epoch": 1.476923076923077,
1281
+ "eval_logits/chosen": -0.4385281801223755,
1282
+ "eval_logits/rejected": -0.43417835235595703,
1283
+ "eval_logps/chosen": -41.83414077758789,
1284
+ "eval_logps/rejected": -48.51993179321289,
1285
+ "eval_loss": 0.19258952140808105,
1286
+ "eval_rewards/accuracies": 0.8527397513389587,
1287
+ "eval_rewards/chosen": 1.6856427192687988,
1288
+ "eval_rewards/margins": 4.5064897537231445,
1289
+ "eval_rewards/rejected": -2.8208470344543457,
1290
+ "eval_runtime": 398.4093,
1291
+ "eval_samples_per_second": 4.352,
1292
+ "eval_steps_per_second": 0.183,
1293
+ "step": 120
1294
+ },
1295
+ {
1296
+ "epoch": 1.5015384615384615,
1297
+ "grad_norm": 3.8817405874572923,
1298
+ "learning_rate": 8.81534288045431e-08,
1299
+ "logits/chosen": -0.4345233738422394,
1300
+ "logits/rejected": -0.432235985994339,
1301
+ "logps/chosen": -38.9439697265625,
1302
+ "logps/rejected": -40.60162353515625,
1303
+ "loss": 0.155,
1304
+ "rewards/accuracies": 0.875,
1305
+ "rewards/chosen": 1.4539457559585571,
1306
+ "rewards/margins": 3.9496073722839355,
1307
+ "rewards/rejected": -2.4956612586975098,
1308
+ "step": 122
1309
+ },
1310
+ {
1311
+ "epoch": 1.5261538461538462,
1312
+ "grad_norm": 4.9773505616027425,
1313
+ "learning_rate": 8.005137637112302e-08,
1314
+ "logits/chosen": -0.42935946583747864,
1315
+ "logits/rejected": -0.4225008487701416,
1316
+ "logps/chosen": -40.33964157104492,
1317
+ "logps/rejected": -37.91059875488281,
1318
+ "loss": 0.1685,
1319
+ "rewards/accuracies": 0.90625,
1320
+ "rewards/chosen": 1.3645392656326294,
1321
+ "rewards/margins": 3.7918734550476074,
1322
+ "rewards/rejected": -2.4273343086242676,
1323
+ "step": 124
1324
+ },
1325
+ {
1326
+ "epoch": 1.5384615384615383,
1327
+ "eval_logits/chosen": -0.4404720067977905,
1328
+ "eval_logits/rejected": -0.4361659288406372,
1329
+ "eval_logps/chosen": -41.75288391113281,
1330
+ "eval_logps/rejected": -48.5331916809082,
1331
+ "eval_loss": 0.19096876680850983,
1332
+ "eval_rewards/accuracies": 0.8561643958091736,
1333
+ "eval_rewards/chosen": 1.7262725830078125,
1334
+ "eval_rewards/margins": 4.553750991821289,
1335
+ "eval_rewards/rejected": -2.8274781703948975,
1336
+ "eval_runtime": 397.8296,
1337
+ "eval_samples_per_second": 4.359,
1338
+ "eval_steps_per_second": 0.183,
1339
+ "step": 125
1340
+ },
1341
+ {
1342
+ "epoch": 1.5507692307692307,
1343
+ "grad_norm": 3.816285562997914,
1344
+ "learning_rate": 7.226838472098237e-08,
1345
+ "logits/chosen": -0.4305260181427002,
1346
+ "logits/rejected": -0.427184522151947,
1347
+ "logps/chosen": -38.50707244873047,
1348
+ "logps/rejected": -31.813642501831055,
1349
+ "loss": 0.1785,
1350
+ "rewards/accuracies": 0.84375,
1351
+ "rewards/chosen": 1.653688669204712,
1352
+ "rewards/margins": 3.0635831356048584,
1353
+ "rewards/rejected": -1.4098942279815674,
1354
+ "step": 126
1355
+ },
1356
+ {
1357
+ "epoch": 1.5753846153846154,
1358
+ "grad_norm": 5.070564484576485,
1359
+ "learning_rate": 6.481906560771524e-08,
1360
+ "logits/chosen": -0.45029404759407043,
1361
+ "logits/rejected": -0.44603830575942993,
1362
+ "logps/chosen": -40.4008674621582,
1363
+ "logps/rejected": -34.62392044067383,
1364
+ "loss": 0.162,
1365
+ "rewards/accuracies": 0.890625,
1366
+ "rewards/chosen": 1.847829818725586,
1367
+ "rewards/margins": 3.5869626998901367,
1368
+ "rewards/rejected": -1.7391327619552612,
1369
+ "step": 128
1370
+ },
1371
+ {
1372
+ "epoch": 1.6,
1373
+ "grad_norm": 4.291727231427144,
1374
+ "learning_rate": 5.771740434959277e-08,
1375
+ "logits/chosen": -0.43359375,
1376
+ "logits/rejected": -0.4286433160305023,
1377
+ "logps/chosen": -38.0141716003418,
1378
+ "logps/rejected": -41.48249053955078,
1379
+ "loss": 0.1637,
1380
+ "rewards/accuracies": 0.828125,
1381
+ "rewards/chosen": 1.5690267086029053,
1382
+ "rewards/margins": 3.8614985942840576,
1383
+ "rewards/rejected": -2.2924716472625732,
1384
+ "step": 130
1385
+ },
1386
+ {
1387
+ "epoch": 1.6,
1388
+ "eval_logits/chosen": -0.43712207674980164,
1389
+ "eval_logits/rejected": -0.43287885189056396,
1390
+ "eval_logps/chosen": -41.706233978271484,
1391
+ "eval_logps/rejected": -48.57079315185547,
1392
+ "eval_loss": 0.18982651829719543,
1393
+ "eval_rewards/accuracies": 0.8595890402793884,
1394
+ "eval_rewards/chosen": 1.7495962381362915,
1395
+ "eval_rewards/margins": 4.5958757400512695,
1396
+ "eval_rewards/rejected": -2.8462791442871094,
1397
+ "eval_runtime": 399.3255,
1398
+ "eval_samples_per_second": 4.342,
1399
+ "eval_steps_per_second": 0.183,
1400
+ "step": 130
1401
+ }
1402
+ ],
1403
+ "logging_steps": 2,
1404
+ "max_steps": 162,
1405
+ "num_input_tokens_seen": 0,
1406
+ "num_train_epochs": 2,
1407
+ "save_steps": 10,
1408
+ "stateful_callbacks": {
1409
+ "TrainerControl": {
1410
+ "args": {
1411
+ "should_epoch_stop": false,
1412
+ "should_evaluate": false,
1413
+ "should_log": false,
1414
+ "should_save": true,
1415
+ "should_training_stop": false
1416
+ },
1417
+ "attributes": {}
1418
+ }
1419
+ },
1420
+ "total_flos": 0.0,
1421
+ "train_batch_size": 4,
1422
+ "trial_name": null,
1423
+ "trial_params": null
1424
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549c2b42eab9866ab808832ed774dd82cd93df9a7fbebb35ae01abdb0fb127c3
3
+ size 7352