Model save

Files changed (6) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/i5y5acqm)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jainamitnew-penn-state/huggingface/runs/lquicf5c)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 5.154237393785408e-07,
-    "train_runtime": 77.0261,
-    "train_samples": 45000,
-    "train_samples_per_second": 140.212,
-    "train_steps_per_second": 5.842
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.0,
+    "train_runtime": 1.3116,
+    "train_samples": 3,
+    "train_samples_per_second": 8234.047,
+    "train_steps_per_second": 343.085
 }

tensorboard-logs/events.out.tfevents.1738864133.node2501.3854422.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a985cc8accaabd0127d1e71223620ebb6850ff351e652799381f0db291c62373
+size 5834

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
-size 11422063

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 5.154237393785408e-07,
-    "train_runtime": 77.0261,
-    "train_samples": 45000,
-    "train_samples_per_second": 140.212,
-    "train_steps_per_second": 5.842
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.0,
+    "train_runtime": 1.3116,
+    "train_samples": 3,
+    "train_samples_per_second": 8234.047,
+    "train_steps_per_second": 343.085
 }

trainer_state.json CHANGED Viewed

@@ -2934,25 +2934,19 @@
       "step": 450
     },
     {
-      "completion_length": 381.8854293823242,
       "epoch": 0.24053333333333332,
-      "kl": 0.2254638671875,
-      "reward": 1.348958358168602,
-      "reward_std": 0.22792584728449583,
-      "rewards/equation_reward_func": 0.4270833469927311,
-      "rewards/format_reward_func": 0.9218750149011612,
       "step": 451,
       "total_flos": 0.0,
-      "train_loss": 5.154237393785408e-07,
-      "train_runtime": 77.0261,
-      "train_samples_per_second": 140.212,
-      "train_steps_per_second": 5.842
     }
   ],
   "logging_steps": 2,
   "max_steps": 450,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {

       "step": 450
     },
     {
       "epoch": 0.24053333333333332,
       "step": 451,
       "total_flos": 0.0,
+      "train_loss": 0.0,
+      "train_runtime": 1.3116,
+      "train_samples_per_second": 8234.047,
+      "train_steps_per_second": 343.085
     }
   ],
   "logging_steps": 2,
   "max_steps": 450,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 450,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {