ZHLiu627 commited on
Commit
251b0a9
·
1 Parent(s): 675cd0b
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
4
+ tags:
5
+ - alignment-handbook
6
+ - generated_from_trainer
7
+ datasets:
8
+ - argilla/dpo-mix-7k
9
+ model-index:
10
+ - name: gemma_rdpo_eta0.005_no_decay
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # gemma_rdpo_eta0.005_no_decay
18
+
19
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the argilla/dpo-mix-7k dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5899
22
+ - Rewards/chosen: -0.7561
23
+ - Rewards/rejected: -2.1676
24
+ - Rewards/accuracies: 0.7234
25
+ - Rewards/margins: 1.4115
26
+ - Logps/rejected: -464.4193
27
+ - Logps/chosen: -408.9966
28
+ - Logits/rejected: 161.6372
29
+ - Logits/chosen: 163.3166
30
+ - Eta: 0.0050
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-07
50
+ - train_batch_size: 1
51
+ - eval_batch_size: 1
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 8
55
+ - gradient_accumulation_steps: 16
56
+ - total_train_batch_size: 128
57
+ - total_eval_batch_size: 8
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: cosine
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 2
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Eta |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:------:|
67
+ | 0.5915 | 0.9479 | 50 | 0.5745 | -0.7212 | -1.8546 | 0.7021 | 1.1334 | -458.1599 | -408.3000 | 171.6370 | 174.0368 | 0.0050 |
68
+ | 0.2599 | 1.8957 | 100 | 0.5899 | -0.7561 | -2.1676 | 0.7234 | 1.4115 | -464.4193 | -408.9966 | 161.6372 | 163.3166 | 0.0050 |
69
+
70
+
71
+ ### Framework versions
72
+
73
+ - Transformers 4.40.2
74
+ - Pytorch 2.3.0+cu121
75
+ - Datasets 2.19.1
76
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.971563981042654,
3
+ "eval_eta": 0.004999999888241291,
4
+ "eval_logits/chosen": 163.31393432617188,
5
+ "eval_logits/rejected": 161.63084411621094,
6
+ "eval_logps/chosen": -408.5458679199219,
7
+ "eval_logps/rejected": -463.5101013183594,
8
+ "eval_loss": 0.5857101082801819,
9
+ "eval_rewards/accuracies": 0.7021276354789734,
10
+ "eval_rewards/chosen": -0.733529269695282,
11
+ "eval_rewards/margins": 1.3885893821716309,
12
+ "eval_rewards/rejected": -2.1221187114715576,
13
+ "eval_runtime": 443.0016,
14
+ "eval_samples": 750,
15
+ "eval_samples_per_second": 1.693,
16
+ "eval_steps_per_second": 0.212,
17
+ "total_flos": 0.0,
18
+ "train_loss": 0.48392796287169826,
19
+ "train_runtime": 15946.9173,
20
+ "train_samples": 6750,
21
+ "train_samples_per_second": 0.847,
22
+ "train_steps_per_second": 0.007
23
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HuggingFaceH4/zephyr-7b-gemma-sft-v0.1",
3
+ "architectures": [
4
+ "GemmaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "head_dim": 256,
11
+ "hidden_act": "gelu",
12
+ "hidden_activation": null,
13
+ "hidden_size": 3072,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 24576,
16
+ "max_position_embeddings": 8192,
17
+ "model_type": "gemma",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 28,
20
+ "num_key_value_heads": 16,
21
+ "pad_token_id": 0,
22
+ "rms_norm_eps": 1e-06,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.40.2",
27
+ "use_cache": true,
28
+ "vocab_size": 256000
29
+ }
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.971563981042654,
3
+ "eval_eta": 0.004999999888241291,
4
+ "eval_logits/chosen": 163.31393432617188,
5
+ "eval_logits/rejected": 161.63084411621094,
6
+ "eval_logps/chosen": -408.5458679199219,
7
+ "eval_logps/rejected": -463.5101013183594,
8
+ "eval_loss": 0.5857101082801819,
9
+ "eval_rewards/accuracies": 0.7021276354789734,
10
+ "eval_rewards/chosen": -0.733529269695282,
11
+ "eval_rewards/margins": 1.3885893821716309,
12
+ "eval_rewards/rejected": -2.1221187114715576,
13
+ "eval_runtime": 443.0016,
14
+ "eval_samples": 750,
15
+ "eval_samples_per_second": 1.693,
16
+ "eval_steps_per_second": 0.212
17
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.40.2"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<eos>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<bos>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "106": {
38
+ "content": "<|im_start|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "107": {
46
+ "content": "<|im_end|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ }
53
+ },
54
+ "additional_special_tokens": [
55
+ "<|im_start|>",
56
+ "<|im_end|>"
57
+ ],
58
+ "bos_token": "<bos>",
59
+ "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
60
+ "clean_up_tokenization_spaces": false,
61
+ "eos_token": "<eos>",
62
+ "legacy": null,
63
+ "model_max_length": 2048,
64
+ "pad_token": "<pad>",
65
+ "sp_model_kwargs": {},
66
+ "spaces_between_special_tokens": false,
67
+ "tokenizer_class": "GemmaTokenizer",
68
+ "unk_token": "<unk>",
69
+ "use_default_system_prompt": false
70
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.971563981042654,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.48392796287169826,
5
+ "train_runtime": 15946.9173,
6
+ "train_samples": 6750,
7
+ "train_samples_per_second": 0.847,
8
+ "train_steps_per_second": 0.007
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.971563981042654,
5
+ "eval_steps": 50,
6
+ "global_step": 104,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.018957345971563982,
13
+ "eta": 0.004999999422580004,
14
+ "grad_norm": 188.9079473976178,
15
+ "learning_rate": 4.545454545454545e-08,
16
+ "logits/chosen": 204.35707092285156,
17
+ "logits/rejected": 182.54800415039062,
18
+ "logps/chosen": -443.4819030761719,
19
+ "logps/rejected": -434.7948303222656,
20
+ "loss": 0.786,
21
+ "rewards/accuracies": 0.0,
22
+ "rewards/chosen": 0.0,
23
+ "rewards/margins": 0.0,
24
+ "rewards/rejected": 0.0,
25
+ "step": 1
26
+ },
27
+ {
28
+ "epoch": 0.1895734597156398,
29
+ "eta": 0.004999998956918716,
30
+ "grad_norm": 122.11894542304172,
31
+ "learning_rate": 4.545454545454545e-07,
32
+ "logits/chosen": 175.2365264892578,
33
+ "logits/rejected": 185.066162109375,
34
+ "logps/chosen": -384.705078125,
35
+ "logps/rejected": -446.1976013183594,
36
+ "loss": 0.8045,
37
+ "rewards/accuracies": 0.4166666567325592,
38
+ "rewards/chosen": 0.06007244065403938,
39
+ "rewards/margins": -0.035397082567214966,
40
+ "rewards/rejected": 0.09546952694654465,
41
+ "step": 10
42
+ },
43
+ {
44
+ "epoch": 0.3791469194312796,
45
+ "eta": 0.004999999888241291,
46
+ "grad_norm": 139.6567004586221,
47
+ "learning_rate": 4.885348141000122e-07,
48
+ "logits/chosen": 176.22576904296875,
49
+ "logits/rejected": 177.17520141601562,
50
+ "logps/chosen": -368.12298583984375,
51
+ "logps/rejected": -404.42626953125,
52
+ "loss": 0.7339,
53
+ "rewards/accuracies": 0.7124999761581421,
54
+ "rewards/chosen": 0.8393497467041016,
55
+ "rewards/margins": 0.46265918016433716,
56
+ "rewards/rejected": 0.376690536737442,
57
+ "step": 20
58
+ },
59
+ {
60
+ "epoch": 0.5687203791469194,
61
+ "eta": 0.004999999888241291,
62
+ "grad_norm": 117.96859521631961,
63
+ "learning_rate": 4.5025027361734613e-07,
64
+ "logits/chosen": 169.5486602783203,
65
+ "logits/rejected": 178.0144500732422,
66
+ "logps/chosen": -358.923095703125,
67
+ "logps/rejected": -427.748291015625,
68
+ "loss": 0.6881,
69
+ "rewards/accuracies": 0.71875,
70
+ "rewards/chosen": -0.8779987096786499,
71
+ "rewards/margins": 0.9514387249946594,
72
+ "rewards/rejected": -1.829437494277954,
73
+ "step": 30
74
+ },
75
+ {
76
+ "epoch": 0.7582938388625592,
77
+ "eta": 0.004999999888241291,
78
+ "grad_norm": 93.43413782788734,
79
+ "learning_rate": 3.893311157806091e-07,
80
+ "logits/chosen": 173.6021728515625,
81
+ "logits/rejected": 164.51864624023438,
82
+ "logps/chosen": -390.3794860839844,
83
+ "logps/rejected": -413.7120666503906,
84
+ "loss": 0.6527,
85
+ "rewards/accuracies": 0.6937500238418579,
86
+ "rewards/chosen": -0.9942947626113892,
87
+ "rewards/margins": 0.9089801907539368,
88
+ "rewards/rejected": -1.9032748937606812,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 0.9478672985781991,
93
+ "eta": 0.004999999888241291,
94
+ "grad_norm": 100.08364463289377,
95
+ "learning_rate": 3.126631330646801e-07,
96
+ "logits/chosen": 178.54013061523438,
97
+ "logits/rejected": 175.74232482910156,
98
+ "logps/chosen": -410.1822814941406,
99
+ "logps/rejected": -440.4054260253906,
100
+ "loss": 0.5915,
101
+ "rewards/accuracies": 0.737500011920929,
102
+ "rewards/chosen": -0.44992417097091675,
103
+ "rewards/margins": 1.3113642930984497,
104
+ "rewards/rejected": -1.7612884044647217,
105
+ "step": 50
106
+ },
107
+ {
108
+ "epoch": 0.9478672985781991,
109
+ "eval_eta": 0.004999999888241291,
110
+ "eval_logits/chosen": 174.03684997558594,
111
+ "eval_logits/rejected": 171.63697814941406,
112
+ "eval_logps/chosen": -408.2999572753906,
113
+ "eval_logps/rejected": -458.15985107421875,
114
+ "eval_loss": 0.5744712352752686,
115
+ "eval_rewards/accuracies": 0.7021276354789734,
116
+ "eval_rewards/chosen": -0.721230685710907,
117
+ "eval_rewards/margins": 1.1333802938461304,
118
+ "eval_rewards/rejected": -1.8546110391616821,
119
+ "eval_runtime": 444.1021,
120
+ "eval_samples_per_second": 1.689,
121
+ "eval_steps_per_second": 0.212,
122
+ "step": 50
123
+ },
124
+ {
125
+ "epoch": 1.1374407582938388,
126
+ "eta": 0.004999999888241291,
127
+ "grad_norm": 45.26799050209758,
128
+ "learning_rate": 2.2891223348923882e-07,
129
+ "logits/chosen": 171.12948608398438,
130
+ "logits/rejected": 174.81674194335938,
131
+ "logps/chosen": -353.70318603515625,
132
+ "logps/rejected": -442.4922790527344,
133
+ "loss": 0.4059,
134
+ "rewards/accuracies": 0.887499988079071,
135
+ "rewards/chosen": -0.18334674835205078,
136
+ "rewards/margins": 2.376243829727173,
137
+ "rewards/rejected": -2.5595908164978027,
138
+ "step": 60
139
+ },
140
+ {
141
+ "epoch": 1.3270142180094786,
142
+ "eta": 0.004999999888241291,
143
+ "grad_norm": 47.78511870505548,
144
+ "learning_rate": 1.4754491880085317e-07,
145
+ "logits/chosen": 162.48245239257812,
146
+ "logits/rejected": 170.5321044921875,
147
+ "logps/chosen": -348.3051452636719,
148
+ "logps/rejected": -411.1819763183594,
149
+ "loss": 0.2932,
150
+ "rewards/accuracies": 0.96875,
151
+ "rewards/chosen": 0.4100046753883362,
152
+ "rewards/margins": 2.892571449279785,
153
+ "rewards/rejected": -2.4825665950775146,
154
+ "step": 70
155
+ },
156
+ {
157
+ "epoch": 1.5165876777251186,
158
+ "eta": 0.004999999888241291,
159
+ "grad_norm": 49.92244645169992,
160
+ "learning_rate": 7.775827023107834e-08,
161
+ "logits/chosen": 158.89816284179688,
162
+ "logits/rejected": 175.38070678710938,
163
+ "logps/chosen": -390.493408203125,
164
+ "logps/rejected": -479.05596923828125,
165
+ "loss": 0.2611,
166
+ "rewards/accuracies": 0.9312499761581421,
167
+ "rewards/chosen": 0.07505069673061371,
168
+ "rewards/margins": 3.036956310272217,
169
+ "rewards/rejected": -2.9619057178497314,
170
+ "step": 80
171
+ },
172
+ {
173
+ "epoch": 1.7061611374407581,
174
+ "eta": 0.004999999888241291,
175
+ "grad_norm": 48.71805868934349,
176
+ "learning_rate": 2.7440387297912122e-08,
177
+ "logits/chosen": 162.8424530029297,
178
+ "logits/rejected": 171.51806640625,
179
+ "logps/chosen": -368.1020812988281,
180
+ "logps/rejected": -489.2860412597656,
181
+ "loss": 0.2467,
182
+ "rewards/accuracies": 0.956250011920929,
183
+ "rewards/chosen": 0.014399850741028786,
184
+ "rewards/margins": 3.4118752479553223,
185
+ "rewards/rejected": -3.397475481033325,
186
+ "step": 90
187
+ },
188
+ {
189
+ "epoch": 1.8957345971563981,
190
+ "eta": 0.004999999888241291,
191
+ "grad_norm": 36.80728075073422,
192
+ "learning_rate": 2.27878296044029e-09,
193
+ "logits/chosen": 163.50262451171875,
194
+ "logits/rejected": 165.15457153320312,
195
+ "logps/chosen": -376.7969970703125,
196
+ "logps/rejected": -453.5335998535156,
197
+ "loss": 0.2599,
198
+ "rewards/accuracies": 0.949999988079071,
199
+ "rewards/chosen": 0.01423326600342989,
200
+ "rewards/margins": 3.457508087158203,
201
+ "rewards/rejected": -3.443274974822998,
202
+ "step": 100
203
+ },
204
+ {
205
+ "epoch": 1.8957345971563981,
206
+ "eval_eta": 0.004999999888241291,
207
+ "eval_logits/chosen": 163.3166046142578,
208
+ "eval_logits/rejected": 161.63723754882812,
209
+ "eval_logps/chosen": -408.9965515136719,
210
+ "eval_logps/rejected": -464.4193420410156,
211
+ "eval_loss": 0.5899427533149719,
212
+ "eval_rewards/accuracies": 0.7234042286872864,
213
+ "eval_rewards/chosen": -0.7560604810714722,
214
+ "eval_rewards/margins": 1.4115227460861206,
215
+ "eval_rewards/rejected": -2.167583465576172,
216
+ "eval_runtime": 445.1494,
217
+ "eval_samples_per_second": 1.685,
218
+ "eval_steps_per_second": 0.211,
219
+ "step": 100
220
+ },
221
+ {
222
+ "epoch": 1.971563981042654,
223
+ "step": 104,
224
+ "total_flos": 0.0,
225
+ "train_loss": 0.48392796287169826,
226
+ "train_runtime": 15946.9173,
227
+ "train_samples_per_second": 0.847,
228
+ "train_steps_per_second": 0.007
229
+ }
230
+ ],
231
+ "logging_steps": 10,
232
+ "max_steps": 104,
233
+ "num_input_tokens_seen": 0,
234
+ "num_train_epochs": 2,
235
+ "save_steps": 45,
236
+ "total_flos": 0.0,
237
+ "train_batch_size": 1,
238
+ "trial_name": null,
239
+ "trial_params": null
240
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f58b0f7d2bccada0a60385781cd6898d02ffb0ae78178987d2a917f514ea3e
3
+ size 6456