jainamit commited on
Commit
b35b6d9
·
verified ·
1 Parent(s): 86ce8c4

Model save

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +4 -4
  3. config.json +1 -1
  4. train_results.json +4 -4
  5. trainer_state.json +14 -8
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/6ccmz8b9)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/i5y5acqm)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00016750305739037685,
4
- "train_runtime": 21555.541,
5
  "train_samples": 45000,
6
- "train_samples_per_second": 0.501,
7
- "train_steps_per_second": 0.021
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 5.154237393785408e-07,
4
+ "train_runtime": 77.0261,
5
  "train_samples": 45000,
6
+ "train_samples_per_second": 140.212,
7
+ "train_steps_per_second": 5.842
8
  }
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.48.1",
26
- "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
 
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.48.1",
26
+ "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00016750305739037685,
4
- "train_runtime": 21555.541,
5
  "train_samples": 45000,
6
- "train_samples_per_second": 0.501,
7
- "train_steps_per_second": 0.021
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 5.154237393785408e-07,
4
+ "train_runtime": 77.0261,
5
  "train_samples": 45000,
6
+ "train_samples_per_second": 140.212,
7
+ "train_steps_per_second": 5.842
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24,
5
  "eval_steps": 500,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2934,13 +2934,19 @@
2934
  "step": 450
2935
  },
2936
  {
2937
- "epoch": 0.24,
2938
- "step": 450,
 
 
 
 
 
 
2939
  "total_flos": 0.0,
2940
- "train_loss": 0.00016750305739037685,
2941
- "train_runtime": 21555.541,
2942
- "train_samples_per_second": 0.501,
2943
- "train_steps_per_second": 0.021
2944
  }
2945
  ],
2946
  "logging_steps": 2,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24053333333333332,
5
  "eval_steps": 500,
6
+ "global_step": 451,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2934
  "step": 450
2935
  },
2936
  {
2937
+ "completion_length": 381.8854293823242,
2938
+ "epoch": 0.24053333333333332,
2939
+ "kl": 0.2254638671875,
2940
+ "reward": 1.348958358168602,
2941
+ "reward_std": 0.22792584728449583,
2942
+ "rewards/equation_reward_func": 0.4270833469927311,
2943
+ "rewards/format_reward_func": 0.9218750149011612,
2944
+ "step": 451,
2945
  "total_flos": 0.0,
2946
+ "train_loss": 5.154237393785408e-07,
2947
+ "train_runtime": 77.0261,
2948
+ "train_samples_per_second": 140.212,
2949
+ "train_steps_per_second": 5.842
2950
  }
2951
  ],
2952
  "logging_steps": 2,