diyali95916 commited on
Commit
2546ed9
·
verified ·
1 Parent(s): 33b11c9

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6941
19
- - Rewards/chosen: -0.0008
20
- - Rewards/rejected: 0.0011
21
- - Rewards/accuracies: 0.4870
22
- - Rewards/margins: -0.0019
23
- - Logps/rejected: -256.6144
24
- - Logps/chosen: -273.1258
25
- - Logits/rejected: -2.8923
26
- - Logits/chosen: -2.8913
27
 
28
  ## Model description
29
 
@@ -47,10 +47,10 @@ The following hyperparameters were used during training:
47
  - eval_batch_size: 4
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
- - num_devices: 2
51
  - gradient_accumulation_steps: 32
52
- - total_train_batch_size: 128
53
- - total_eval_batch_size: 8
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
@@ -60,9 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6931 | 0.84 | 4 | 0.6944 | -0.0020 | 0.0023 | 0.4680 | -0.0042 | -256.6025 | -273.1375 | -2.8922 | -2.8913 |
64
- | 0.6931 | 1.88 | 9 | 0.6937 | -0.0006 | 0.0011 | 0.4930 | -0.0016 | -256.6148 | -273.1236 | -2.8922 | -2.8914 |
65
- | 0.6934 | 2.51 | 12 | 0.6941 | -0.0008 | 0.0011 | 0.4870 | -0.0019 | -256.6144 | -273.1258 | -2.8923 | -2.8913 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6925
19
+ - Rewards/chosen: -0.0045
20
+ - Rewards/rejected: 0.0164
21
+ - Rewards/accuracies: 0.25
22
+ - Rewards/margins: -0.0209
23
+ - Logps/rejected: -35.6209
24
+ - Logps/chosen: -119.3746
25
+ - Logits/rejected: -2.6567
26
+ - Logits/chosen: -2.7227
27
 
28
  ## Model description
29
 
 
47
  - eval_batch_size: 4
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
+ - num_devices: 8
51
  - gradient_accumulation_steps: 32
52
+ - total_train_batch_size: 512
53
+ - total_eval_batch_size: 32
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6931 | 0.8 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -35.7851 | -119.3297 | -2.6563 | -2.7224 |
64
+ | 0.6931 | 1.6 | 2 | 0.6903 | -0.0086 | 0.0118 | 0.0 | -0.0204 | -35.6673 | -119.4162 | -2.6570 | -2.7221 |
65
+ | 0.6931 | 2.4 | 3 | 0.6925 | -0.0045 | 0.0164 | 0.25 | -0.0209 | -35.6209 | -119.3746 | -2.6567 | -2.7227 |
66
 
67
 
68
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "k_proj",
21
  "q_proj",
22
- "o_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "q_proj",
20
+ "v_proj",
21
+ "o_proj",
22
+ "k_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9aa4b56ad055076743bb6f6bf61d2cfad2be482bbca7691c5ffb99fcd147dca
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a53cb969618e88f0b672364127ce1906c69e16656f648c1eaccf6e536d36ca
3
  size 109086672
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 2.51,
3
- "eval_logits/chosen": -2.891324996948242,
4
- "eval_logits/rejected": -2.892286777496338,
5
- "eval_logps/chosen": -273.12579345703125,
6
- "eval_logps/rejected": -256.6144104003906,
7
- "eval_loss": 0.6940562725067139,
8
- "eval_rewards/accuracies": 0.4869999885559082,
9
- "eval_rewards/chosen": -0.0007941981311887503,
10
- "eval_rewards/margins": -0.0018857381073758006,
11
- "eval_rewards/rejected": 0.0010915396269410849,
12
- "eval_runtime": 605.1943,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 3.305,
15
- "eval_steps_per_second": 0.413,
16
- "train_loss": 0.6932857781648636,
17
- "train_runtime": 2636.7323,
18
- "train_samples": 611,
19
- "train_samples_per_second": 0.695,
20
- "train_steps_per_second": 0.005
21
  }
 
1
  {
2
+ "epoch": 2.4,
3
+ "eval_logits/chosen": -2.7227089405059814,
4
+ "eval_logits/rejected": -2.6567294597625732,
5
+ "eval_logps/chosen": -119.37458038330078,
6
+ "eval_logps/rejected": -35.62090301513672,
7
+ "eval_loss": 0.6925258040428162,
8
+ "eval_rewards/accuracies": 0.25,
9
+ "eval_rewards/chosen": -0.004486369900405407,
10
+ "eval_rewards/margins": -0.020908452570438385,
11
+ "eval_rewards/rejected": 0.016422081738710403,
12
+ "eval_runtime": 2.6911,
13
+ "eval_samples": 30,
14
+ "eval_samples_per_second": 11.148,
15
+ "eval_steps_per_second": 0.372,
16
+ "train_loss": 0.6927651365598043,
17
+ "train_runtime": 245.4034,
18
+ "train_samples": 626,
19
+ "train_samples_per_second": 7.653,
20
+ "train_steps_per_second": 0.012
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 2.51,
3
- "eval_logits/chosen": -2.891324996948242,
4
- "eval_logits/rejected": -2.892286777496338,
5
- "eval_logps/chosen": -273.12579345703125,
6
- "eval_logps/rejected": -256.6144104003906,
7
- "eval_loss": 0.6940562725067139,
8
- "eval_rewards/accuracies": 0.4869999885559082,
9
- "eval_rewards/chosen": -0.0007941981311887503,
10
- "eval_rewards/margins": -0.0018857381073758006,
11
- "eval_rewards/rejected": 0.0010915396269410849,
12
- "eval_runtime": 605.1943,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 3.305,
15
- "eval_steps_per_second": 0.413
16
  }
 
1
  {
2
+ "epoch": 2.4,
3
+ "eval_logits/chosen": -2.7227089405059814,
4
+ "eval_logits/rejected": -2.6567294597625732,
5
+ "eval_logps/chosen": -119.37458038330078,
6
+ "eval_logps/rejected": -35.62090301513672,
7
+ "eval_loss": 0.6925258040428162,
8
+ "eval_rewards/accuracies": 0.25,
9
+ "eval_rewards/chosen": -0.004486369900405407,
10
+ "eval_rewards/margins": -0.020908452570438385,
11
+ "eval_rewards/rejected": 0.016422081738710403,
12
+ "eval_runtime": 2.6911,
13
+ "eval_samples": 30,
14
+ "eval_samples_per_second": 11.148,
15
+ "eval_steps_per_second": 0.372
16
  }
runs/Jan25_03-04-25_jupyter-dli/events.out.tfevents.1706151962.jupyter-dli.54020.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:342c7b14c459551375d416fb868b77b7a0941e1d8c22d3c24eacc792f56b1067
3
+ size 4991
runs/Jan25_03-26-17_jupyter-dli/events.out.tfevents.1706153211.jupyter-dli.63500.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba9daeacc69a50eb4f81cc2618ecfed56e9e9e0516febce196499eefb7e8e49
3
+ size 7520
runs/Jan25_03-26-17_jupyter-dli/events.out.tfevents.1706153459.jupyter-dli.63500.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1352996015b9bdb9b7f09f5d0228224b5f6d7426df19a9faf500a9ea4b4665ec
3
+ size 815
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.51,
3
- "train_loss": 0.6932857781648636,
4
- "train_runtime": 2636.7323,
5
- "train_samples": 611,
6
- "train_samples_per_second": 0.695,
7
- "train_steps_per_second": 0.005
8
  }
 
1
  {
2
+ "epoch": 2.4,
3
+ "train_loss": 0.6927651365598043,
4
+ "train_runtime": 245.4034,
5
+ "train_samples": 626,
6
+ "train_samples_per_second": 7.653,
7
+ "train_steps_per_second": 0.012
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5098039215686274,
5
  "eval_steps": 100,
6
- "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.21,
13
- "learning_rate": 2.5e-07,
14
- "logits/chosen": -2.9307734966278076,
15
- "logits/rejected": -2.9072206020355225,
16
- "logps/chosen": -257.9442138671875,
17
- "logps/rejected": -223.16062927246094,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -23,79 +23,65 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.84,
27
- "eval_logits/chosen": -2.8913073539733887,
28
- "eval_logits/rejected": -2.892223834991455,
29
- "eval_logps/chosen": -273.1374816894531,
30
- "eval_logps/rejected": -256.60247802734375,
31
- "eval_loss": 0.6944335699081421,
32
- "eval_rewards/accuracies": 0.46799999475479126,
33
- "eval_rewards/chosen": -0.0019581823144108057,
34
- "eval_rewards/margins": -0.004248426295816898,
35
- "eval_rewards/rejected": 0.0022902432829141617,
36
- "eval_runtime": 604.0376,
37
- "eval_samples_per_second": 3.311,
38
- "eval_steps_per_second": 0.414,
39
- "step": 4
40
- },
41
- {
42
- "epoch": 1.88,
43
- "eval_logits/chosen": -2.891376495361328,
44
- "eval_logits/rejected": -2.8922488689422607,
45
- "eval_logps/chosen": -273.12359619140625,
46
- "eval_logps/rejected": -256.61480712890625,
47
- "eval_loss": 0.6936609148979187,
48
- "eval_rewards/accuracies": 0.49300000071525574,
49
- "eval_rewards/chosen": -0.0005687248194590211,
50
- "eval_rewards/margins": -0.0016233286587521434,
51
- "eval_rewards/rejected": 0.0010546041885390878,
52
- "eval_runtime": 607.6521,
53
- "eval_samples_per_second": 3.291,
54
- "eval_steps_per_second": 0.411,
55
- "step": 9
56
  },
57
  {
58
- "epoch": 2.09,
59
- "learning_rate": 1e-07,
60
- "logits/chosen": -2.9432342052459717,
61
- "logits/rejected": -2.949681520462036,
62
- "logps/chosen": -269.6990051269531,
63
- "logps/rejected": -258.2781982421875,
64
- "loss": 0.6934,
65
- "rewards/accuracies": 0.4340277910232544,
66
- "rewards/chosen": 0.0009870771318674088,
67
- "rewards/margins": 0.0012862730072811246,
68
- "rewards/rejected": -0.0002991959627252072,
69
- "step": 10
 
 
70
  },
71
  {
72
- "epoch": 2.51,
73
- "eval_logits/chosen": -2.891324996948242,
74
- "eval_logits/rejected": -2.892286777496338,
75
- "eval_logps/chosen": -273.12579345703125,
76
- "eval_logps/rejected": -256.6144104003906,
77
- "eval_loss": 0.6940562725067139,
78
- "eval_rewards/accuracies": 0.4869999885559082,
79
- "eval_rewards/chosen": -0.0007941981311887503,
80
- "eval_rewards/margins": -0.0018857381073758006,
81
- "eval_rewards/rejected": 0.0010915396269410849,
82
- "eval_runtime": 604.8757,
83
- "eval_samples_per_second": 3.306,
84
- "eval_steps_per_second": 0.413,
85
- "step": 12
86
  },
87
  {
88
- "epoch": 2.51,
89
- "step": 12,
90
  "total_flos": 0.0,
91
- "train_loss": 0.6932857781648636,
92
- "train_runtime": 2636.7323,
93
- "train_samples_per_second": 0.695,
94
- "train_steps_per_second": 0.005
95
  }
96
  ],
97
  "logging_steps": 10,
98
- "max_steps": 12,
99
  "num_train_epochs": 3,
100
  "save_steps": 500,
101
  "total_flos": 0.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.4,
5
  "eval_steps": 100,
6
+ "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8,
13
+ "learning_rate": 5e-07,
14
+ "logits/chosen": -2.7074732780456543,
15
+ "logits/rejected": -2.714259147644043,
16
+ "logps/chosen": -177.91046142578125,
17
+ "logps/rejected": -169.544921875,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
 
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 0.8,
27
+ "eval_logits/chosen": -2.7223572731018066,
28
+ "eval_logits/rejected": -2.6562907695770264,
29
+ "eval_logps/chosen": -119.3297119140625,
30
+ "eval_logps/rejected": -35.78512191772461,
31
+ "eval_loss": 0.6931473016738892,
32
+ "eval_rewards/accuracies": 0.0,
33
+ "eval_rewards/chosen": 0.0,
34
+ "eval_rewards/margins": 0.0,
35
+ "eval_rewards/rejected": 0.0,
36
+ "eval_runtime": 2.722,
37
+ "eval_samples_per_second": 11.021,
38
+ "eval_steps_per_second": 0.367,
39
+ "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
+ "epoch": 1.6,
43
+ "eval_logits/chosen": -2.722149133682251,
44
+ "eval_logits/rejected": -2.656991481781006,
45
+ "eval_logps/chosen": -119.41619873046875,
46
+ "eval_logps/rejected": -35.667266845703125,
47
+ "eval_loss": 0.690349817276001,
48
+ "eval_rewards/accuracies": 0.0,
49
+ "eval_rewards/chosen": -0.008648109622299671,
50
+ "eval_rewards/margins": -0.020433522760868073,
51
+ "eval_rewards/rejected": 0.011785412207245827,
52
+ "eval_runtime": 2.7089,
53
+ "eval_samples_per_second": 11.075,
54
+ "eval_steps_per_second": 0.369,
55
+ "step": 2
56
  },
57
  {
58
+ "epoch": 2.4,
59
+ "eval_logits/chosen": -2.7227089405059814,
60
+ "eval_logits/rejected": -2.6567294597625732,
61
+ "eval_logps/chosen": -119.37458038330078,
62
+ "eval_logps/rejected": -35.62090301513672,
63
+ "eval_loss": 0.6925258040428162,
64
+ "eval_rewards/accuracies": 0.25,
65
+ "eval_rewards/chosen": -0.004486369900405407,
66
+ "eval_rewards/margins": -0.020908452570438385,
67
+ "eval_rewards/rejected": 0.016422081738710403,
68
+ "eval_runtime": 2.6892,
69
+ "eval_samples_per_second": 11.156,
70
+ "eval_steps_per_second": 0.372,
71
+ "step": 3
72
  },
73
  {
74
+ "epoch": 2.4,
75
+ "step": 3,
76
  "total_flos": 0.0,
77
+ "train_loss": 0.6927651365598043,
78
+ "train_runtime": 245.4034,
79
+ "train_samples_per_second": 7.653,
80
+ "train_steps_per_second": 0.012
81
  }
82
  ],
83
  "logging_steps": 10,
84
+ "max_steps": 3,
85
  "num_train_epochs": 3,
86
  "save_steps": 500,
87
  "total_flos": 0.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6e0a992426eb70b08db763df2e71fa42f4afe271e674e50c228443e1dc5bb93
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53e20161db1b1d41d7c2044c14f2ecec03643d3dd24b7f13d44742d874a8247
3
  size 5752