diyali95916 commited on
Commit
2567823
·
verified ·
1 Parent(s): 0bcab86

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: mit
3
- base_model: HuggingFaceH4/mistral-7b-sft-beta
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
- This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6704
19
- - Rewards/chosen: 0.0154
20
- - Rewards/rejected: -0.0291
21
- - Rewards/accuracies: 0.5898
22
- - Rewards/margins: 0.0445
23
- - Logps/rejected: -263.0990
24
- - Logps/chosen: -268.9026
25
- - Logits/rejected: -2.9178
26
- - Logits/chosen: -2.8992
27
 
28
  ## Model description
29
 
@@ -54,15 +54,16 @@ The following hyperparameters were used during training:
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 3
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6836 | 0.99 | 120 | 0.6827 | 0.0088 | -0.0081 | 0.5742 | 0.0169 | -262.8899 | -268.9695 | -2.9192 | -2.9002 |
64
- | 0.6739 | 2.0 | 241 | 0.6739 | 0.0186 | -0.0224 | 0.6367 | 0.0410 | -263.0327 | -268.8711 | -2.9181 | -2.8995 |
65
- | 0.6692 | 2.98 | 360 | 0.6704 | 0.0154 | -0.0291 | 0.5898 | 0.0445 | -263.0990 | -268.9026 | -2.9178 | -2.8992 |
 
66
 
67
 
68
  ### Framework versions
 
1
  ---
2
  license: mit
3
+ base_model: HuggingFaceH4/zephyr-7b-beta
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6874
19
+ - Rewards/chosen: 0.0803
20
+ - Rewards/rejected: 0.0298
21
+ - Rewards/accuracies: 1.0
22
+ - Rewards/margins: 0.0505
23
+ - Logps/rejected: -101.0604
24
+ - Logps/chosen: -102.9630
25
+ - Logits/rejected: -2.2160
26
+ - Logits/chosen: -2.1724
27
 
28
  ## Model description
29
 
 
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 5
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6931 | 0.8 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -101.3584 | -103.7664 | -2.2157 | -2.1717 |
64
+ | 0.6931 | 1.6 | 2 | 0.6948 | 0.0296 | 0.0079 | 0.5 | 0.0217 | -101.2790 | -103.4700 | -2.2147 | -2.1715 |
65
+ | 0.6931 | 2.4 | 3 | 0.6913 | 0.0277 | 0.0090 | 0.75 | 0.0188 | -101.2689 | -103.4891 | -2.2153 | -2.1709 |
66
+ | 0.6931 | 4.0 | 5 | 0.6874 | 0.0803 | 0.0298 | 1.0 | 0.0505 | -101.0604 | -102.9630 | -2.2160 | -2.1724 |
67
 
68
 
69
  ### Framework versions
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "HuggingFaceH4/mistral-7b-sft-beta",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "q_proj",
21
  "k_proj",
22
- "o_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceH4/zephyr-7b-beta",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "k_proj",
20
+ "q_proj",
21
+ "o_proj",
22
+ "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a470dc3d526d820ea9f9f2492aae51122fc16615cdd94c4e50f0ceffd25ce6f
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85a9785f9eb434dc9c53abbc8850cebbf70813cbc5cdaaee5b484364b861dc6
3
  size 109086672
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 2.98,
3
- "eval_logits/chosen": -2.8992161750793457,
4
- "eval_logits/rejected": -2.917783260345459,
5
- "eval_logps/chosen": -268.90264892578125,
6
- "eval_logps/rejected": -263.0989685058594,
7
- "eval_loss": 0.6703784465789795,
8
- "eval_rewards/accuracies": 0.58984375,
9
- "eval_rewards/chosen": 0.015448413789272308,
10
- "eval_rewards/margins": 0.04450809210538864,
11
- "eval_rewards/rejected": -0.029059680178761482,
12
- "eval_runtime": 181.4906,
13
- "eval_samples": 2030,
14
- "eval_samples_per_second": 11.185,
15
- "eval_steps_per_second": 0.353,
16
- "train_loss": 0.6791177903612454,
17
- "train_runtime": 30079.4021,
18
- "train_samples": 61761,
19
- "train_samples_per_second": 6.16,
20
- "train_steps_per_second": 0.012
21
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_logits/chosen": -2.1724095344543457,
4
+ "eval_logits/rejected": -2.2160496711730957,
5
+ "eval_logps/chosen": -102.96302795410156,
6
+ "eval_logps/rejected": -101.06044006347656,
7
+ "eval_loss": 0.6873850226402283,
8
+ "eval_rewards/accuracies": 1.0,
9
+ "eval_rewards/chosen": 0.08033924549818039,
10
+ "eval_rewards/margins": 0.05054035410284996,
11
+ "eval_rewards/rejected": 0.02979888767004013,
12
+ "eval_runtime": 6.0716,
13
+ "eval_samples": 30,
14
+ "eval_samples_per_second": 4.941,
15
+ "eval_steps_per_second": 0.165,
16
+ "train_loss": 0.6922631859779358,
17
+ "train_runtime": 883.8621,
18
+ "train_samples": 626,
19
+ "train_samples_per_second": 3.541,
20
+ "train_steps_per_second": 0.006
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 2.98,
3
- "eval_logits/chosen": -2.8992161750793457,
4
- "eval_logits/rejected": -2.917783260345459,
5
- "eval_logps/chosen": -268.90264892578125,
6
- "eval_logps/rejected": -263.0989685058594,
7
- "eval_loss": 0.6703784465789795,
8
- "eval_rewards/accuracies": 0.58984375,
9
- "eval_rewards/chosen": 0.015448413789272308,
10
- "eval_rewards/margins": 0.04450809210538864,
11
- "eval_rewards/rejected": -0.029059680178761482,
12
- "eval_runtime": 181.4906,
13
- "eval_samples": 2030,
14
- "eval_samples_per_second": 11.185,
15
- "eval_steps_per_second": 0.353
16
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_logits/chosen": -2.1724095344543457,
4
+ "eval_logits/rejected": -2.2160496711730957,
5
+ "eval_logps/chosen": -102.96302795410156,
6
+ "eval_logps/rejected": -101.06044006347656,
7
+ "eval_loss": 0.6873850226402283,
8
+ "eval_rewards/accuracies": 1.0,
9
+ "eval_rewards/chosen": 0.08033924549818039,
10
+ "eval_rewards/margins": 0.05054035410284996,
11
+ "eval_rewards/rejected": 0.02979888767004013,
12
+ "eval_runtime": 6.0716,
13
+ "eval_samples": 30,
14
+ "eval_samples_per_second": 4.941,
15
+ "eval_steps_per_second": 0.165
16
  }
runs/Jan25_18-44-21_jupyter-dli/events.out.tfevents.1706208337.jupyter-dli.239378.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a19afef61062f8d1d2d156e1ea2b84f47e3b6dd69219235caa1e0dcd6503d50
3
+ size 8264
runs/Jan25_18-44-21_jupyter-dli/events.out.tfevents.1706209227.jupyter-dli.239378.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d267448f7319c8c27c184a48758ea3f229604df5725408467935f659db63d9
3
+ size 815
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.98,
3
- "train_loss": 0.6791177903612454,
4
- "train_runtime": 30079.4021,
5
- "train_samples": 61761,
6
- "train_samples_per_second": 6.16,
7
- "train_steps_per_second": 0.012
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.6922631859779358,
4
+ "train_runtime": 883.8621,
5
+ "train_samples": 626,
6
+ "train_samples_per_second": 3.541,
7
+ "train_steps_per_second": 0.006
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.983682983682984,
5
  "eval_steps": 100,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "learning_rate": 1.3888888888888887e-08,
14
- "logits/chosen": -2.951993465423584,
15
- "logits/rejected": -2.9205567836761475,
16
- "logps/chosen": -276.23077392578125,
17
- "logps/rejected": -244.7322998046875,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -23,570 +23,82 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.08,
27
- "learning_rate": 1.3888888888888888e-07,
28
- "logits/chosen": -2.930194854736328,
29
- "logits/rejected": -2.9322521686553955,
30
- "logps/chosen": -258.4957275390625,
31
- "logps/rejected": -252.5615234375,
32
- "loss": 0.6937,
33
- "rewards/accuracies": 0.4392361044883728,
34
- "rewards/chosen": -0.0008609014330431819,
35
- "rewards/margins": -0.0007996508502401412,
36
- "rewards/rejected": -6.125055369921029e-05,
37
- "step": 10
38
- },
39
- {
40
- "epoch": 0.17,
41
- "learning_rate": 2.7777777777777776e-07,
42
- "logits/chosen": -2.935396432876587,
43
- "logits/rejected": -2.9414877891540527,
44
- "logps/chosen": -269.8578796386719,
45
- "logps/rejected": -250.003662109375,
46
- "loss": 0.6931,
47
- "rewards/accuracies": 0.512499988079071,
48
- "rewards/chosen": -0.0003611525462474674,
49
- "rewards/margins": 0.002654502633959055,
50
- "rewards/rejected": -0.0030156546272337437,
51
- "step": 20
52
- },
53
- {
54
- "epoch": 0.25,
55
- "learning_rate": 4.1666666666666667e-07,
56
- "logits/chosen": -2.922468423843384,
57
- "logits/rejected": -2.924722671508789,
58
- "logps/chosen": -283.06756591796875,
59
- "logps/rejected": -263.92694091796875,
60
- "loss": 0.6926,
61
- "rewards/accuracies": 0.503125011920929,
62
- "rewards/chosen": 0.0008422272512689233,
63
- "rewards/margins": 0.0005572644877247512,
64
- "rewards/rejected": 0.00028496290906332433,
65
- "step": 30
66
- },
67
- {
68
- "epoch": 0.33,
69
- "learning_rate": 4.938271604938271e-07,
70
- "logits/chosen": -2.917189598083496,
71
- "logits/rejected": -2.939058780670166,
72
- "logps/chosen": -280.8387145996094,
73
- "logps/rejected": -257.81353759765625,
74
- "loss": 0.6915,
75
- "rewards/accuracies": 0.5249999761581421,
76
- "rewards/chosen": 0.0037843205500394106,
77
- "rewards/margins": 0.006630954332649708,
78
- "rewards/rejected": -0.002846634481102228,
79
- "step": 40
80
- },
81
- {
82
- "epoch": 0.41,
83
- "learning_rate": 4.783950617283951e-07,
84
- "logits/chosen": -2.9359450340270996,
85
- "logits/rejected": -2.93603777885437,
86
- "logps/chosen": -268.5244445800781,
87
- "logps/rejected": -253.9750518798828,
88
- "loss": 0.6907,
89
- "rewards/accuracies": 0.5453125238418579,
90
- "rewards/chosen": 0.00316084292717278,
91
- "rewards/margins": 0.005354513414204121,
92
- "rewards/rejected": -0.0021936697885394096,
93
- "step": 50
94
- },
95
- {
96
- "epoch": 0.5,
97
- "learning_rate": 4.6296296296296297e-07,
98
- "logits/chosen": -2.942714214324951,
99
- "logits/rejected": -2.9400081634521484,
100
- "logps/chosen": -280.0882263183594,
101
- "logps/rejected": -257.28839111328125,
102
- "loss": 0.6892,
103
- "rewards/accuracies": 0.512499988079071,
104
- "rewards/chosen": 0.0040632085874676704,
105
- "rewards/margins": 0.004900970496237278,
106
- "rewards/rejected": -0.0008377617341466248,
107
- "step": 60
108
- },
109
- {
110
- "epoch": 0.58,
111
- "learning_rate": 4.4753086419753083e-07,
112
- "logits/chosen": -2.9372458457946777,
113
- "logits/rejected": -2.9435629844665527,
114
- "logps/chosen": -273.5423889160156,
115
- "logps/rejected": -253.7714080810547,
116
- "loss": 0.6882,
117
- "rewards/accuracies": 0.565625011920929,
118
- "rewards/chosen": 0.0066502392292022705,
119
- "rewards/margins": 0.010684076696634293,
120
- "rewards/rejected": -0.004033837933093309,
121
- "step": 70
122
- },
123
- {
124
- "epoch": 0.66,
125
- "learning_rate": 4.320987654320987e-07,
126
- "logits/chosen": -2.896692991256714,
127
- "logits/rejected": -2.875420093536377,
128
- "logps/chosen": -274.2783508300781,
129
- "logps/rejected": -255.4978790283203,
130
- "loss": 0.6883,
131
- "rewards/accuracies": 0.5687500238418579,
132
- "rewards/chosen": 0.007802584674209356,
133
- "rewards/margins": 0.010292423889040947,
134
- "rewards/rejected": -0.0024898392148315907,
135
- "step": 80
136
- },
137
- {
138
- "epoch": 0.75,
139
- "learning_rate": 4.1666666666666667e-07,
140
- "logits/chosen": -2.923689603805542,
141
- "logits/rejected": -2.935713291168213,
142
- "logps/chosen": -277.1148986816406,
143
- "logps/rejected": -247.6132354736328,
144
- "loss": 0.6867,
145
- "rewards/accuracies": 0.5562499761581421,
146
- "rewards/chosen": 0.007110255304723978,
147
- "rewards/margins": 0.012844784185290337,
148
- "rewards/rejected": -0.005734528414905071,
149
- "step": 90
150
- },
151
- {
152
- "epoch": 0.83,
153
- "learning_rate": 4.0123456790123453e-07,
154
- "logits/chosen": -2.9391238689422607,
155
- "logits/rejected": -2.931495189666748,
156
- "logps/chosen": -273.63140869140625,
157
- "logps/rejected": -253.19534301757812,
158
- "loss": 0.6857,
159
- "rewards/accuracies": 0.5703125,
160
- "rewards/chosen": 0.0076993731781840324,
161
- "rewards/margins": 0.014409579336643219,
162
- "rewards/rejected": -0.0067102061584591866,
163
- "step": 100
164
- },
165
- {
166
- "epoch": 0.91,
167
- "learning_rate": 3.8580246913580245e-07,
168
- "logits/chosen": -2.9282491207122803,
169
- "logits/rejected": -2.924396276473999,
170
- "logps/chosen": -264.43011474609375,
171
- "logps/rejected": -243.5092315673828,
172
- "loss": 0.6851,
173
- "rewards/accuracies": 0.5796874761581421,
174
- "rewards/chosen": 0.007344271056354046,
175
- "rewards/margins": 0.016184944659471512,
176
- "rewards/rejected": -0.008840671740472317,
177
- "step": 110
178
- },
179
- {
180
- "epoch": 0.99,
181
- "learning_rate": 3.703703703703703e-07,
182
- "logits/chosen": -2.9296412467956543,
183
- "logits/rejected": -2.9385550022125244,
184
- "logps/chosen": -280.14300537109375,
185
- "logps/rejected": -244.4756317138672,
186
- "loss": 0.6836,
187
- "rewards/accuracies": 0.6234375238418579,
188
- "rewards/chosen": 0.011619888246059418,
189
- "rewards/margins": 0.023034537211060524,
190
- "rewards/rejected": -0.011414647102355957,
191
- "step": 120
192
- },
193
- {
194
- "epoch": 0.99,
195
- "eval_logits/chosen": -2.900207042694092,
196
- "eval_logits/rejected": -2.9191529750823975,
197
- "eval_logps/chosen": -268.9694519042969,
198
- "eval_logps/rejected": -262.8898620605469,
199
- "eval_loss": 0.6826778650283813,
200
- "eval_rewards/accuracies": 0.57421875,
201
- "eval_rewards/chosen": 0.008766621351242065,
202
- "eval_rewards/margins": 0.016915924847126007,
203
- "eval_rewards/rejected": -0.008149303495883942,
204
- "eval_runtime": 182.2275,
205
- "eval_samples_per_second": 11.14,
206
- "eval_steps_per_second": 0.351,
207
- "step": 120
208
- },
209
- {
210
- "epoch": 1.08,
211
- "learning_rate": 3.549382716049383e-07,
212
- "logits/chosen": -2.9227781295776367,
213
- "logits/rejected": -2.901576519012451,
214
- "logps/chosen": -259.58624267578125,
215
- "logps/rejected": -248.39675903320312,
216
- "loss": 0.6816,
217
- "rewards/accuracies": 0.6156250238418579,
218
- "rewards/chosen": 0.010982013307511806,
219
- "rewards/margins": 0.02251209318637848,
220
- "rewards/rejected": -0.011530080810189247,
221
- "step": 130
222
- },
223
- {
224
- "epoch": 1.16,
225
- "learning_rate": 3.3950617283950614e-07,
226
- "logits/chosen": -2.9140427112579346,
227
- "logits/rejected": -2.903498649597168,
228
- "logps/chosen": -267.62469482421875,
229
- "logps/rejected": -242.287109375,
230
- "loss": 0.6816,
231
- "rewards/accuracies": 0.598437488079071,
232
- "rewards/chosen": 0.013705916702747345,
233
- "rewards/margins": 0.021453356370329857,
234
- "rewards/rejected": -0.007747439201921225,
235
- "step": 140
236
- },
237
- {
238
- "epoch": 1.24,
239
- "learning_rate": 3.2407407407407406e-07,
240
- "logits/chosen": -2.928597927093506,
241
- "logits/rejected": -2.9360718727111816,
242
- "logps/chosen": -283.0503845214844,
243
- "logps/rejected": -263.2129821777344,
244
- "loss": 0.6812,
245
- "rewards/accuracies": 0.612500011920929,
246
- "rewards/chosen": 0.016042985022068024,
247
- "rewards/margins": 0.02658357098698616,
248
- "rewards/rejected": -0.010540584102272987,
249
- "step": 150
250
- },
251
- {
252
- "epoch": 1.33,
253
- "learning_rate": 3.086419753086419e-07,
254
- "logits/chosen": -2.9245991706848145,
255
- "logits/rejected": -2.9161458015441895,
256
- "logps/chosen": -283.2376403808594,
257
- "logps/rejected": -249.5203399658203,
258
- "loss": 0.6786,
259
- "rewards/accuracies": 0.620312511920929,
260
- "rewards/chosen": 0.017403725534677505,
261
- "rewards/margins": 0.030968856066465378,
262
- "rewards/rejected": -0.013565132394433022,
263
- "step": 160
264
- },
265
- {
266
- "epoch": 1.41,
267
- "learning_rate": 2.932098765432099e-07,
268
- "logits/chosen": -2.9159512519836426,
269
- "logits/rejected": -2.9223380088806152,
270
- "logps/chosen": -272.6524353027344,
271
- "logps/rejected": -249.50711059570312,
272
- "loss": 0.6799,
273
- "rewards/accuracies": 0.6234375238418579,
274
- "rewards/chosen": 0.01738204061985016,
275
- "rewards/margins": 0.02959570661187172,
276
- "rewards/rejected": -0.01221366599202156,
277
- "step": 170
278
- },
279
- {
280
- "epoch": 1.49,
281
- "learning_rate": 2.7777777777777776e-07,
282
- "logits/chosen": -2.9411699771881104,
283
- "logits/rejected": -2.936310291290283,
284
- "logps/chosen": -259.71112060546875,
285
- "logps/rejected": -245.5165252685547,
286
- "loss": 0.6786,
287
- "rewards/accuracies": 0.609375,
288
- "rewards/chosen": 0.01514382939785719,
289
- "rewards/margins": 0.02933506667613983,
290
- "rewards/rejected": -0.014191237278282642,
291
- "step": 180
292
- },
293
- {
294
- "epoch": 1.57,
295
- "learning_rate": 2.623456790123457e-07,
296
- "logits/chosen": -2.932964563369751,
297
- "logits/rejected": -2.9182915687561035,
298
- "logps/chosen": -267.070556640625,
299
- "logps/rejected": -242.4732208251953,
300
- "loss": 0.6763,
301
- "rewards/accuracies": 0.6187499761581421,
302
- "rewards/chosen": 0.010820230469107628,
303
- "rewards/margins": 0.028901537880301476,
304
- "rewards/rejected": -0.018081307411193848,
305
- "step": 190
306
- },
307
- {
308
- "epoch": 1.66,
309
- "learning_rate": 2.4691358024691354e-07,
310
- "logits/chosen": -2.9549834728240967,
311
- "logits/rejected": -2.9499382972717285,
312
- "logps/chosen": -274.26885986328125,
313
- "logps/rejected": -251.1107940673828,
314
- "loss": 0.6763,
315
- "rewards/accuracies": 0.6234375238418579,
316
- "rewards/chosen": 0.016665898263454437,
317
- "rewards/margins": 0.03335059434175491,
318
- "rewards/rejected": -0.016684692353010178,
319
- "step": 200
320
- },
321
- {
322
- "epoch": 1.74,
323
- "learning_rate": 2.3148148148148148e-07,
324
- "logits/chosen": -2.922349214553833,
325
- "logits/rejected": -2.9319396018981934,
326
- "logps/chosen": -277.7991027832031,
327
- "logps/rejected": -265.2469482421875,
328
- "loss": 0.6756,
329
- "rewards/accuracies": 0.620312511920929,
330
- "rewards/chosen": 0.017026837915182114,
331
- "rewards/margins": 0.03320584446191788,
332
- "rewards/rejected": -0.016179006546735764,
333
- "step": 210
334
- },
335
- {
336
- "epoch": 1.82,
337
- "learning_rate": 2.1604938271604935e-07,
338
- "logits/chosen": -2.921238899230957,
339
- "logits/rejected": -2.91098952293396,
340
- "logps/chosen": -265.0746154785156,
341
- "logps/rejected": -245.3384246826172,
342
- "loss": 0.6745,
343
- "rewards/accuracies": 0.660937488079071,
344
- "rewards/chosen": 0.01762019656598568,
345
- "rewards/margins": 0.04026446118950844,
346
- "rewards/rejected": -0.02264426089823246,
347
- "step": 220
348
- },
349
- {
350
- "epoch": 1.91,
351
- "learning_rate": 2.0061728395061726e-07,
352
- "logits/chosen": -2.957491636276245,
353
- "logits/rejected": -2.9489035606384277,
354
- "logps/chosen": -269.3310546875,
355
- "logps/rejected": -258.45452880859375,
356
- "loss": 0.6727,
357
- "rewards/accuracies": 0.6484375,
358
- "rewards/chosen": 0.02087206393480301,
359
- "rewards/margins": 0.04010782390832901,
360
- "rewards/rejected": -0.01923576183617115,
361
- "step": 230
362
- },
363
- {
364
- "epoch": 1.99,
365
- "learning_rate": 1.8518518518518516e-07,
366
- "logits/chosen": -2.912625789642334,
367
- "logits/rejected": -2.8954434394836426,
368
- "logps/chosen": -278.33441162109375,
369
- "logps/rejected": -257.1600341796875,
370
- "loss": 0.6739,
371
- "rewards/accuracies": 0.6484375,
372
- "rewards/chosen": 0.019938599318265915,
373
- "rewards/margins": 0.043915756046772,
374
- "rewards/rejected": -0.023977158591151237,
375
- "step": 240
376
- },
377
- {
378
- "epoch": 2.0,
379
- "eval_logits/chosen": -2.8995361328125,
380
- "eval_logits/rejected": -2.918098211288452,
381
- "eval_logps/chosen": -268.8711242675781,
382
- "eval_logps/rejected": -263.03265380859375,
383
- "eval_loss": 0.6739373207092285,
384
- "eval_rewards/accuracies": 0.63671875,
385
- "eval_rewards/chosen": 0.01860005408525467,
386
- "eval_rewards/margins": 0.04102998971939087,
387
- "eval_rewards/rejected": -0.02242993377149105,
388
- "eval_runtime": 181.6795,
389
- "eval_samples_per_second": 11.174,
390
- "eval_steps_per_second": 0.352,
391
- "step": 241
392
- },
393
- {
394
- "epoch": 2.07,
395
- "learning_rate": 1.6975308641975307e-07,
396
- "logits/chosen": -2.9307079315185547,
397
- "logits/rejected": -2.936490535736084,
398
- "logps/chosen": -279.3055725097656,
399
- "logps/rejected": -257.22015380859375,
400
- "loss": 0.6722,
401
- "rewards/accuracies": 0.6031249761581421,
402
- "rewards/chosen": 0.01767881028354168,
403
- "rewards/margins": 0.041353292763233185,
404
- "rewards/rejected": -0.023674478754401207,
405
- "step": 250
406
- },
407
- {
408
- "epoch": 2.15,
409
- "learning_rate": 1.5432098765432096e-07,
410
- "logits/chosen": -2.879582405090332,
411
- "logits/rejected": -2.8889400959014893,
412
- "logps/chosen": -257.8949279785156,
413
- "logps/rejected": -241.9752197265625,
414
- "loss": 0.6723,
415
- "rewards/accuracies": 0.625,
416
- "rewards/chosen": 0.019200006499886513,
417
- "rewards/margins": 0.0419248566031456,
418
- "rewards/rejected": -0.022724846377968788,
419
- "step": 260
420
- },
421
- {
422
- "epoch": 2.24,
423
- "learning_rate": 1.3888888888888888e-07,
424
- "logits/chosen": -2.9449806213378906,
425
- "logits/rejected": -2.932685136795044,
426
- "logps/chosen": -267.568359375,
427
- "logps/rejected": -242.255859375,
428
- "loss": 0.6739,
429
- "rewards/accuracies": 0.676562488079071,
430
- "rewards/chosen": 0.023365100845694542,
431
- "rewards/margins": 0.046661119908094406,
432
- "rewards/rejected": -0.023296022787690163,
433
- "step": 270
434
  },
435
  {
436
- "epoch": 2.32,
437
- "learning_rate": 1.2345679012345677e-07,
438
- "logits/chosen": -2.9022796154022217,
439
- "logits/rejected": -2.9109320640563965,
440
- "logps/chosen": -265.966064453125,
441
- "logps/rejected": -239.6620330810547,
442
- "loss": 0.6707,
443
- "rewards/accuracies": 0.676562488079071,
444
- "rewards/chosen": 0.026586908847093582,
445
- "rewards/margins": 0.05420393496751785,
446
- "rewards/rejected": -0.02761702612042427,
447
- "step": 280
 
 
448
  },
449
  {
450
  "epoch": 2.4,
451
- "learning_rate": 1.0802469135802467e-07,
452
- "logits/chosen": -2.9430556297302246,
453
- "logits/rejected": -2.946108341217041,
454
- "logps/chosen": -278.1526794433594,
455
- "logps/rejected": -263.77178955078125,
456
- "loss": 0.6717,
457
- "rewards/accuracies": 0.6328125,
458
- "rewards/chosen": 0.019661400467157364,
459
- "rewards/margins": 0.047070957720279694,
460
- "rewards/rejected": -0.02740955725312233,
461
- "step": 290
462
- },
463
- {
464
- "epoch": 2.49,
465
- "learning_rate": 9.259259259259258e-08,
466
- "logits/chosen": -2.9402213096618652,
467
- "logits/rejected": -2.9220597743988037,
468
- "logps/chosen": -260.75665283203125,
469
- "logps/rejected": -263.7254943847656,
470
- "loss": 0.6701,
471
- "rewards/accuracies": 0.667187511920929,
472
- "rewards/chosen": 0.017683709040284157,
473
- "rewards/margins": 0.04577410966157913,
474
- "rewards/rejected": -0.028090402483940125,
475
- "step": 300
476
- },
477
- {
478
- "epoch": 2.57,
479
- "learning_rate": 7.716049382716048e-08,
480
- "logits/chosen": -2.9286868572235107,
481
- "logits/rejected": -2.9299063682556152,
482
- "logps/chosen": -264.2181701660156,
483
- "logps/rejected": -261.91363525390625,
484
- "loss": 0.6704,
485
- "rewards/accuracies": 0.6312500238418579,
486
- "rewards/chosen": 0.01738332025706768,
487
- "rewards/margins": 0.0443243645131588,
488
- "rewards/rejected": -0.02694104239344597,
489
- "step": 310
490
- },
491
- {
492
- "epoch": 2.65,
493
- "learning_rate": 6.172839506172839e-08,
494
- "logits/chosen": -2.9626357555389404,
495
- "logits/rejected": -2.951683521270752,
496
- "logps/chosen": -283.7442626953125,
497
- "logps/rejected": -256.7106018066406,
498
- "loss": 0.6709,
499
- "rewards/accuracies": 0.6546875238418579,
500
- "rewards/chosen": 0.024839671328663826,
501
- "rewards/margins": 0.05458993837237358,
502
- "rewards/rejected": -0.029750261455774307,
503
- "step": 320
504
- },
505
- {
506
- "epoch": 2.74,
507
- "learning_rate": 4.629629629629629e-08,
508
- "logits/chosen": -2.9203758239746094,
509
- "logits/rejected": -2.898723840713501,
510
- "logps/chosen": -275.98834228515625,
511
- "logps/rejected": -259.5394592285156,
512
- "loss": 0.67,
513
- "rewards/accuracies": 0.6265624761581421,
514
- "rewards/chosen": 0.021389978006482124,
515
- "rewards/margins": 0.0422237329185009,
516
- "rewards/rejected": -0.020833751186728477,
517
- "step": 330
518
- },
519
- {
520
- "epoch": 2.82,
521
- "learning_rate": 3.086419753086419e-08,
522
- "logits/chosen": -2.9191930294036865,
523
- "logits/rejected": -2.918905735015869,
524
- "logps/chosen": -282.91143798828125,
525
- "logps/rejected": -272.44012451171875,
526
- "loss": 0.6683,
527
- "rewards/accuracies": 0.640625,
528
- "rewards/chosen": 0.026901666074991226,
529
- "rewards/margins": 0.055442653596401215,
530
- "rewards/rejected": -0.028540989384055138,
531
- "step": 340
532
- },
533
- {
534
- "epoch": 2.9,
535
- "learning_rate": 1.5432098765432096e-08,
536
- "logits/chosen": -2.9336206912994385,
537
- "logits/rejected": -2.929389715194702,
538
- "logps/chosen": -292.12884521484375,
539
- "logps/rejected": -262.97174072265625,
540
- "loss": 0.6696,
541
- "rewards/accuracies": 0.6937500238418579,
542
- "rewards/chosen": 0.03554671257734299,
543
- "rewards/margins": 0.07022932916879654,
544
- "rewards/rejected": -0.034682609140872955,
545
- "step": 350
546
- },
547
- {
548
- "epoch": 2.98,
549
- "learning_rate": 0.0,
550
- "logits/chosen": -2.918612003326416,
551
- "logits/rejected": -2.92228627204895,
552
- "logps/chosen": -272.35125732421875,
553
- "logps/rejected": -258.17169189453125,
554
- "loss": 0.6692,
555
- "rewards/accuracies": 0.6625000238418579,
556
- "rewards/chosen": 0.030398529022932053,
557
- "rewards/margins": 0.05524751543998718,
558
- "rewards/rejected": -0.02484898641705513,
559
- "step": 360
560
- },
561
- {
562
- "epoch": 2.98,
563
- "eval_logits/chosen": -2.8992161750793457,
564
- "eval_logits/rejected": -2.917783260345459,
565
- "eval_logps/chosen": -268.90264892578125,
566
- "eval_logps/rejected": -263.0989685058594,
567
- "eval_loss": 0.6703784465789795,
568
- "eval_rewards/accuracies": 0.58984375,
569
- "eval_rewards/chosen": 0.015448413789272308,
570
- "eval_rewards/margins": 0.04450809210538864,
571
- "eval_rewards/rejected": -0.029059680178761482,
572
- "eval_runtime": 181.8484,
573
- "eval_samples_per_second": 11.163,
574
- "eval_steps_per_second": 0.352,
575
- "step": 360
576
- },
577
- {
578
- "epoch": 2.98,
579
- "step": 360,
580
  "total_flos": 0.0,
581
- "train_loss": 0.6791177903612454,
582
- "train_runtime": 30079.4021,
583
- "train_samples_per_second": 6.16,
584
- "train_steps_per_second": 0.012
585
  }
586
  ],
587
  "logging_steps": 10,
588
- "max_steps": 360,
589
- "num_train_epochs": 3,
590
  "save_steps": 500,
591
  "total_flos": 0.0,
592
  "trial_name": null,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 100,
6
+ "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8,
13
+ "learning_rate": 5e-07,
14
+ "logits/chosen": -2.3972699642181396,
15
+ "logits/rejected": -2.39332914352417,
16
+ "logps/chosen": -153.26783752441406,
17
+ "logps/rejected": -146.77935791015625,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
 
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 0.8,
27
+ "eval_logits/chosen": -2.1716835498809814,
28
+ "eval_logits/rejected": -2.2157046794891357,
29
+ "eval_logps/chosen": -103.76641845703125,
30
+ "eval_logps/rejected": -101.35842895507812,
31
+ "eval_loss": 0.6931473016738892,
32
+ "eval_rewards/accuracies": 0.0,
33
+ "eval_rewards/chosen": 0.0,
34
+ "eval_rewards/margins": 0.0,
35
+ "eval_rewards/rejected": 0.0,
36
+ "eval_runtime": 6.5465,
37
+ "eval_samples_per_second": 4.583,
38
+ "eval_steps_per_second": 0.153,
39
+ "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
+ "epoch": 1.6,
43
+ "eval_logits/chosen": -2.1715452671051025,
44
+ "eval_logits/rejected": -2.2146592140197754,
45
+ "eval_logps/chosen": -103.4699935913086,
46
+ "eval_logps/rejected": -101.27898406982422,
47
+ "eval_loss": 0.694814920425415,
48
+ "eval_rewards/accuracies": 0.5,
49
+ "eval_rewards/chosen": 0.02964324876666069,
50
+ "eval_rewards/margins": 0.021698763594031334,
51
+ "eval_rewards/rejected": 0.007944487035274506,
52
+ "eval_runtime": 6.0506,
53
+ "eval_samples_per_second": 4.958,
54
+ "eval_steps_per_second": 0.165,
55
+ "step": 2
56
  },
57
  {
58
  "epoch": 2.4,
59
+ "eval_logits/chosen": -2.170850992202759,
60
+ "eval_logits/rejected": -2.215348482131958,
61
+ "eval_logps/chosen": -103.48912811279297,
62
+ "eval_logps/rejected": -101.26887512207031,
63
+ "eval_loss": 0.6913403868675232,
64
+ "eval_rewards/accuracies": 0.75,
65
+ "eval_rewards/chosen": 0.027730178087949753,
66
+ "eval_rewards/margins": 0.018774602562189102,
67
+ "eval_rewards/rejected": 0.00895557552576065,
68
+ "eval_runtime": 6.0481,
69
+ "eval_samples_per_second": 4.96,
70
+ "eval_steps_per_second": 0.165,
71
+ "step": 3
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_logits/chosen": -2.1724095344543457,
76
+ "eval_logits/rejected": -2.2160496711730957,
77
+ "eval_logps/chosen": -102.96302795410156,
78
+ "eval_logps/rejected": -101.06044006347656,
79
+ "eval_loss": 0.6873850226402283,
80
+ "eval_rewards/accuracies": 1.0,
81
+ "eval_rewards/chosen": 0.08033924549818039,
82
+ "eval_rewards/margins": 0.05054035410284996,
83
+ "eval_rewards/rejected": 0.02979888767004013,
84
+ "eval_runtime": 6.0751,
85
+ "eval_samples_per_second": 4.938,
86
+ "eval_steps_per_second": 0.165,
87
+ "step": 5
88
+ },
89
+ {
90
+ "epoch": 4.0,
91
+ "step": 5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "total_flos": 0.0,
93
+ "train_loss": 0.6922631859779358,
94
+ "train_runtime": 883.8621,
95
+ "train_samples_per_second": 3.541,
96
+ "train_steps_per_second": 0.006
97
  }
98
  ],
99
  "logging_steps": 10,
100
+ "max_steps": 5,
101
+ "num_train_epochs": 5,
102
  "save_steps": 500,
103
  "total_flos": 0.0,
104
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c8227384e82904ba8d02ed7dd3cf55663930c5840142dfed7da06bbbc5bda5e
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1679b26c1e5cc6b5a159fba09aee53f44a4515086460ae0d7e21b31df550a03
3
  size 5752