jainamit commited on
Commit
27d6bad
·
verified ·
1 Parent(s): b35b6d9

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/i5y5acqm)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/lquicf5c)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 5.154237393785408e-07,
4
- "train_runtime": 77.0261,
5
- "train_samples": 45000,
6
- "train_samples_per_second": 140.212,
7
- "train_steps_per_second": 5.842
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 1.3116,
5
+ "train_samples": 3,
6
+ "train_samples_per_second": 8234.047,
7
+ "train_steps_per_second": 343.085
8
  }
tensorboard-logs/events.out.tfevents.1738864133.node2501.3854422.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a985cc8accaabd0127d1e71223620ebb6850ff351e652799381f0db291c62373
3
+ size 5834
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
- size 11422063
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 5.154237393785408e-07,
4
- "train_runtime": 77.0261,
5
- "train_samples": 45000,
6
- "train_samples_per_second": 140.212,
7
- "train_steps_per_second": 5.842
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 1.3116,
5
+ "train_samples": 3,
6
+ "train_samples_per_second": 8234.047,
7
+ "train_steps_per_second": 343.085
8
  }
trainer_state.json CHANGED
@@ -2934,25 +2934,19 @@
2934
  "step": 450
2935
  },
2936
  {
2937
- "completion_length": 381.8854293823242,
2938
  "epoch": 0.24053333333333332,
2939
- "kl": 0.2254638671875,
2940
- "reward": 1.348958358168602,
2941
- "reward_std": 0.22792584728449583,
2942
- "rewards/equation_reward_func": 0.4270833469927311,
2943
- "rewards/format_reward_func": 0.9218750149011612,
2944
  "step": 451,
2945
  "total_flos": 0.0,
2946
- "train_loss": 5.154237393785408e-07,
2947
- "train_runtime": 77.0261,
2948
- "train_samples_per_second": 140.212,
2949
- "train_steps_per_second": 5.842
2950
  }
2951
  ],
2952
  "logging_steps": 2,
2953
  "max_steps": 450,
2954
  "num_input_tokens_seen": 0,
2955
- "num_train_epochs": 1,
2956
  "save_steps": 25,
2957
  "stateful_callbacks": {
2958
  "TrainerControl": {
 
2934
  "step": 450
2935
  },
2936
  {
 
2937
  "epoch": 0.24053333333333332,
 
 
 
 
 
2938
  "step": 451,
2939
  "total_flos": 0.0,
2940
+ "train_loss": 0.0,
2941
+ "train_runtime": 1.3116,
2942
+ "train_samples_per_second": 8234.047,
2943
+ "train_steps_per_second": 343.085
2944
  }
2945
  ],
2946
  "logging_steps": 2,
2947
  "max_steps": 450,
2948
  "num_input_tokens_seen": 0,
2949
+ "num_train_epochs": 450,
2950
  "save_steps": 25,
2951
  "stateful_callbacks": {
2952
  "TrainerControl": {