Jerry46 commited on
Commit
ad61df1
·
1 Parent(s): b2cdfca

Model save

Browse files
Files changed (5) hide show
  1. README.md +13 -15
  2. all_results.json +16 -16
  3. eval_results.json +12 -12
  4. train_results.json +5 -5
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: alignment-handbook/zephyr-7b-sft-full
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: -0.2038
19
- - Rewards/chosen: -1.1628
20
- - Rewards/rejected: -2.4457
21
- - Rewards/accuracies: 0.6840
22
- - Rewards/margins: 1.2829
23
- - Logps/rejected: -252.9479
24
- - Logps/chosen: -282.7848
25
- - Logits/rejected: -2.9400
26
- - Logits/chosen: -2.9655
27
 
28
  ## Model description
29
 
@@ -54,15 +54,13 @@ The following hyperparameters were used during training:
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 3
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6137 | 1.0 | 968 | 0.6277 | -0.0287 | -0.4191 | 0.7040 | 0.3905 | -232.6823 | -271.4433 | -2.9989 | -3.0154 |
64
- | 0.0705 | 2.0 | 1937 | 0.0570 | -0.6708 | -1.6676 | 0.6960 | 0.9968 | -245.1669 | -277.8647 | -2.9609 | -2.9830 |
65
- | -0.2602 | 3.0 | 2904 | -0.2038 | -1.1628 | -2.4457 | 0.6840 | 1.2829 | -252.9479 | -282.7848 | -2.9400 | -2.9655 |
66
 
67
 
68
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-v0.1
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6642
19
+ - Rewards/chosen: 0.1042
20
+ - Rewards/rejected: 0.0401
21
+ - Rewards/accuracies: 0.6480
22
+ - Rewards/margins: 0.0641
23
+ - Logps/rejected: -230.4560
24
+ - Logps/chosen: -278.6917
25
+ - Logits/rejected: -2.3987
26
+ - Logits/chosen: -2.4597
27
 
28
  ## Model description
29
 
 
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 1
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.661 | 1.0 | 968 | 0.6642 | 0.1042 | 0.0401 | 0.6480 | 0.0641 | -230.4560 | -278.6917 | -2.3987 | -2.4597 |
 
 
64
 
65
 
66
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_logits/chosen": -2.965512990951538,
4
- "eval_logits/rejected": -2.9399757385253906,
5
- "eval_logps/chosen": -282.7847900390625,
6
- "eval_logps/rejected": -252.9479217529297,
7
- "eval_loss": -0.203842431306839,
8
- "eval_rewards/accuracies": 0.6840000152587891,
9
- "eval_rewards/chosen": -1.1628247499465942,
10
- "eval_rewards/margins": 1.2828813791275024,
11
- "eval_rewards/rejected": -2.4457061290740967,
12
- "eval_runtime": 444.1107,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 4.503,
15
  "eval_steps_per_second": 0.281,
16
- "train_loss": 0.36701411283100355,
17
- "train_runtime": 84636.1866,
18
  "train_samples": 61966,
19
- "train_samples_per_second": 2.196,
20
- "train_steps_per_second": 0.034
21
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.4597132205963135,
4
+ "eval_logits/rejected": -2.398695468902588,
5
+ "eval_logps/chosen": -278.69171142578125,
6
+ "eval_logps/rejected": -230.4560089111328,
7
+ "eval_loss": 0.6642152070999146,
8
+ "eval_rewards/accuracies": 0.6480000019073486,
9
+ "eval_rewards/chosen": 0.10415761172771454,
10
+ "eval_rewards/margins": 0.06405296921730042,
11
+ "eval_rewards/rejected": 0.04010463133454323,
12
+ "eval_runtime": 444.8959,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.495,
15
  "eval_steps_per_second": 0.281,
16
+ "train_loss": 0.6728762634529555,
17
+ "train_runtime": 27528.1814,
18
  "train_samples": 61966,
19
+ "train_samples_per_second": 2.251,
20
+ "train_steps_per_second": 0.035
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_logits/chosen": -2.965512990951538,
4
- "eval_logits/rejected": -2.9399757385253906,
5
- "eval_logps/chosen": -282.7847900390625,
6
- "eval_logps/rejected": -252.9479217529297,
7
- "eval_loss": -0.203842431306839,
8
- "eval_rewards/accuracies": 0.6840000152587891,
9
- "eval_rewards/chosen": -1.1628247499465942,
10
- "eval_rewards/margins": 1.2828813791275024,
11
- "eval_rewards/rejected": -2.4457061290740967,
12
- "eval_runtime": 444.1107,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 4.503,
15
  "eval_steps_per_second": 0.281
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.4597132205963135,
4
+ "eval_logits/rejected": -2.398695468902588,
5
+ "eval_logps/chosen": -278.69171142578125,
6
+ "eval_logps/rejected": -230.4560089111328,
7
+ "eval_loss": 0.6642152070999146,
8
+ "eval_rewards/accuracies": 0.6480000019073486,
9
+ "eval_rewards/chosen": 0.10415761172771454,
10
+ "eval_rewards/margins": 0.06405296921730042,
11
+ "eval_rewards/rejected": 0.04010463133454323,
12
+ "eval_runtime": 444.8959,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.495,
15
  "eval_steps_per_second": 0.281
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 0.36701411283100355,
4
- "train_runtime": 84636.1866,
5
  "train_samples": 61966,
6
- "train_samples_per_second": 2.196,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6728762634529555,
4
+ "train_runtime": 27528.1814,
5
  "train_samples": 61966,
6
+ "train_samples_per_second": 2.251,
7
+ "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff