smirki commited on
Commit
6846c2e
·
verified ·
1 Parent(s): a9da5b4

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf8a4840744445d7b5c7b194ce939998d6352bc50adf06a5d078481ee8297373
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1501691ebe2cf9397cb54c3eeb500f35a68345eee09e02b5b4f2c21ce641dc93
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b9dc156f7ab8d8c101ab11f89633c51a3e1ef772e6f7e5bdbdb528550c82290
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d048cbd8874c66c3f4744500a21e9f43f21818be56aa86e8f173d24b0cf6c6
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f8e4772e690bdadd5a1f02432bda81d45b48b1b69475cf388770c2e827ad5db
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c283c17e83831f8f940a4594b9e35974034ae78d97c9584daa26ac8cfdfe19
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bf8199a4f0174f0fea821cfd5e9428ebb987d6fd85e497fec1ce048c10e54b7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da024a2e17d577be47c292f1def586de73347617fba9e5b52c7197d45c2dfdc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01891510899084907,
5
  "eval_steps": 500,
6
- "global_step": 2375,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2851,6 +2851,42 @@
2851
  "reward_std": 0.6412660963833332,
2852
  "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.284224995970726,
2853
  "step": 2370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2854
  }
2855
  ],
2856
  "logging_steps": 10,
@@ -2865,7 +2901,7 @@
2865
  "should_evaluate": false,
2866
  "should_log": false,
2867
  "should_save": true,
2868
- "should_training_stop": false
2869
  },
2870
  "attributes": {}
2871
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01911421540127906,
5
  "eval_steps": 500,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2851
  "reward_std": 0.6412660963833332,
2852
  "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.284224995970726,
2853
  "step": 2370
2854
+ },
2855
+ {
2856
+ "completion_length": 788.575,
2857
+ "epoch": 0.01895493027293507,
2858
+ "grad_norm": 4.536673069000244,
2859
+ "kl": 2.9731951540336015,
2860
+ "learning_rate": 1.0576247944985018e-09,
2861
+ "loss": 0.1189,
2862
+ "reward": 0.850387492030859,
2863
+ "reward_std": 0.4183545672596665,
2864
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.850387492030859,
2865
+ "step": 2380
2866
+ },
2867
+ {
2868
+ "completion_length": 720.825,
2869
+ "epoch": 0.019034572837107064,
2870
+ "grad_norm": 0.19723933935165405,
2871
+ "kl": 1.869138080254197,
2872
+ "learning_rate": 2.6442018223132857e-10,
2873
+ "loss": 0.0748,
2874
+ "reward": 1.344549997150898,
2875
+ "reward_std": 0.4945951491594315,
2876
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.344549997150898,
2877
+ "step": 2390
2878
+ },
2879
+ {
2880
+ "completion_length": 815.04375,
2881
+ "epoch": 0.01911421540127906,
2882
+ "grad_norm": 2.070497989654541,
2883
+ "kl": 3.4263820610009135,
2884
+ "learning_rate": 0.0,
2885
+ "loss": 0.1371,
2886
+ "reward": 0.9367499954998493,
2887
+ "reward_std": 0.5499310284852982,
2888
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.9367499954998493,
2889
+ "step": 2400
2890
  }
2891
  ],
2892
  "logging_steps": 10,
 
2901
  "should_evaluate": false,
2902
  "should_log": false,
2903
  "should_save": true,
2904
+ "should_training_stop": true
2905
  },
2906
  "attributes": {}
2907
  }