alchemist69 commited on
Commit
a65842a
·
verified ·
1 Parent(s): 908c730

Training in progress, step 425, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134499e57d45a7fd5ce0fc745d89b7d5235a5c637b45907532bfc81ea64c4fb3
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ece51e98aa5e08bb825978d3489e0352183f84e442b36189d9c7c900ccfa7b
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07ca44ce0f454b04e9a008cc43f4a349cc7cacf9880fc83effba51973d8c4e74
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:307dcb2034acb404762de1201cda3ef739d641f9fded961a58856b8c457cf964
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859771627dc023f016f01621bc5c786af762f9d6b951abdbadba930557ca3e1a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f714b289dd09c25736d2eb5fc6375b2e8df7d87d36c1242bd537b49bf96a11fa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca2768ac4ca9856b6b7269d283904106ae00a0ce974111c991dbbe02a93bf930
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0fa4917cbab6353dc9863ee5bc208db299bf865d0e1def1c60860a29287ca8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.053612470626831,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.9422850412249706,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2847,6 +2847,181 @@
2847
  "eval_samples_per_second": 9.993,
2848
  "eval_steps_per_second": 2.502,
2849
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2850
  }
2851
  ],
2852
  "logging_steps": 1,
@@ -2870,12 +3045,12 @@
2870
  "should_evaluate": false,
2871
  "should_log": false,
2872
  "should_save": true,
2873
- "should_training_stop": false
2874
  },
2875
  "attributes": {}
2876
  }
2877
  },
2878
- "total_flos": 5.884979311961702e+17,
2879
  "train_batch_size": 8,
2880
  "trial_name": null,
2881
  "trial_params": null
 
1
  {
2
  "best_metric": 1.053612470626831,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
+ "epoch": 1.0011778563015312,
5
  "eval_steps": 100,
6
+ "global_step": 425,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2847
  "eval_samples_per_second": 9.993,
2848
  "eval_steps_per_second": 2.502,
2849
  "step": 400
2850
+ },
2851
+ {
2852
+ "epoch": 0.944640753828033,
2853
+ "grad_norm": 2.428661346435547,
2854
+ "learning_rate": 8.639676646793382e-07,
2855
+ "loss": 2.9634,
2856
+ "step": 401
2857
+ },
2858
+ {
2859
+ "epoch": 0.9469964664310954,
2860
+ "grad_norm": 2.951463460922241,
2861
+ "learning_rate": 7.936574269178377e-07,
2862
+ "loss": 3.2676,
2863
+ "step": 402
2864
+ },
2865
+ {
2866
+ "epoch": 0.9493521790341578,
2867
+ "grad_norm": 2.77046537399292,
2868
+ "learning_rate": 7.263079859864297e-07,
2869
+ "loss": 3.3325,
2870
+ "step": 403
2871
+ },
2872
+ {
2873
+ "epoch": 0.9517078916372202,
2874
+ "grad_norm": 2.883030652999878,
2875
+ "learning_rate": 6.61923394371039e-07,
2876
+ "loss": 3.2712,
2877
+ "step": 404
2878
+ },
2879
+ {
2880
+ "epoch": 0.9540636042402827,
2881
+ "grad_norm": 2.923128843307495,
2882
+ "learning_rate": 6.005075261595494e-07,
2883
+ "loss": 3.4014,
2884
+ "step": 405
2885
+ },
2886
+ {
2887
+ "epoch": 0.9564193168433451,
2888
+ "grad_norm": 2.9878504276275635,
2889
+ "learning_rate": 5.42064076808646e-07,
2890
+ "loss": 3.7119,
2891
+ "step": 406
2892
+ },
2893
+ {
2894
+ "epoch": 0.9587750294464076,
2895
+ "grad_norm": 3.2617733478546143,
2896
+ "learning_rate": 4.865965629214819e-07,
2897
+ "loss": 3.5583,
2898
+ "step": 407
2899
+ },
2900
+ {
2901
+ "epoch": 0.9611307420494699,
2902
+ "grad_norm": 3.0995283126831055,
2903
+ "learning_rate": 4.341083220360864e-07,
2904
+ "loss": 3.511,
2905
+ "step": 408
2906
+ },
2907
+ {
2908
+ "epoch": 0.9634864546525324,
2909
+ "grad_norm": 3.3939716815948486,
2910
+ "learning_rate": 3.846025124245145e-07,
2911
+ "loss": 3.6265,
2912
+ "step": 409
2913
+ },
2914
+ {
2915
+ "epoch": 0.9658421672555948,
2916
+ "grad_norm": 3.365983486175537,
2917
+ "learning_rate": 3.380821129028489e-07,
2918
+ "loss": 3.8739,
2919
+ "step": 410
2920
+ },
2921
+ {
2922
+ "epoch": 0.9681978798586572,
2923
+ "grad_norm": 3.7531888484954834,
2924
+ "learning_rate": 2.945499226519322e-07,
2925
+ "loss": 3.5749,
2926
+ "step": 411
2927
+ },
2928
+ {
2929
+ "epoch": 0.9705535924617197,
2930
+ "grad_norm": 4.2459940910339355,
2931
+ "learning_rate": 2.5400856104894067e-07,
2932
+ "loss": 4.4775,
2933
+ "step": 412
2934
+ },
2935
+ {
2936
+ "epoch": 0.9729093050647821,
2937
+ "grad_norm": 4.396928310394287,
2938
+ "learning_rate": 2.1646046750978254e-07,
2939
+ "loss": 4.078,
2940
+ "step": 413
2941
+ },
2942
+ {
2943
+ "epoch": 0.9752650176678446,
2944
+ "grad_norm": 4.993990421295166,
2945
+ "learning_rate": 1.819079013423153e-07,
2946
+ "loss": 5.1984,
2947
+ "step": 414
2948
+ },
2949
+ {
2950
+ "epoch": 0.9776207302709069,
2951
+ "grad_norm": 8.399889945983887,
2952
+ "learning_rate": 1.503529416103988e-07,
2953
+ "loss": 4.4625,
2954
+ "step": 415
2955
+ },
2956
+ {
2957
+ "epoch": 0.9799764428739693,
2958
+ "grad_norm": 5.020269870758057,
2959
+ "learning_rate": 1.2179748700879012e-07,
2960
+ "loss": 4.6616,
2961
+ "step": 416
2962
+ },
2963
+ {
2964
+ "epoch": 0.9823321554770318,
2965
+ "grad_norm": 6.619906902313232,
2966
+ "learning_rate": 9.624325574890125e-08,
2967
+ "loss": 5.0221,
2968
+ "step": 417
2969
+ },
2970
+ {
2971
+ "epoch": 0.9846878680800942,
2972
+ "grad_norm": 6.48217248916626,
2973
+ "learning_rate": 7.369178545542088e-08,
2974
+ "loss": 4.5943,
2975
+ "step": 418
2976
+ },
2977
+ {
2978
+ "epoch": 0.9870435806831567,
2979
+ "grad_norm": 7.419217109680176,
2980
+ "learning_rate": 5.4144433073771707e-08,
2981
+ "loss": 5.5227,
2982
+ "step": 419
2983
+ },
2984
+ {
2985
+ "epoch": 0.9893992932862191,
2986
+ "grad_norm": 8.479371070861816,
2987
+ "learning_rate": 3.760237478849793e-08,
2988
+ "loss": 5.264,
2989
+ "step": 420
2990
+ },
2991
+ {
2992
+ "epoch": 0.9917550058892816,
2993
+ "grad_norm": 18.458097457885742,
2994
+ "learning_rate": 2.4066605952444142e-08,
2995
+ "loss": 5.3962,
2996
+ "step": 421
2997
+ },
2998
+ {
2999
+ "epoch": 0.9941107184923439,
3000
+ "grad_norm": 10.58730697631836,
3001
+ "learning_rate": 1.3537941026914303e-08,
3002
+ "loss": 4.2135,
3003
+ "step": 422
3004
+ },
3005
+ {
3006
+ "epoch": 0.9964664310954063,
3007
+ "grad_norm": 12.496702194213867,
3008
+ "learning_rate": 6.017013532627624e-09,
3009
+ "loss": 4.5984,
3010
+ "step": 423
3011
+ },
3012
+ {
3013
+ "epoch": 0.9988221436984688,
3014
+ "grad_norm": 18.563827514648438,
3015
+ "learning_rate": 1.5042760116212861e-09,
3016
+ "loss": 4.4128,
3017
+ "step": 424
3018
+ },
3019
+ {
3020
+ "epoch": 1.0011778563015312,
3021
+ "grad_norm": 32.98548889160156,
3022
+ "learning_rate": 0.0,
3023
+ "loss": 5.637,
3024
+ "step": 425
3025
  }
3026
  ],
3027
  "logging_steps": 1,
 
3045
  "should_evaluate": false,
3046
  "should_log": false,
3047
  "should_save": true,
3048
+ "should_training_stop": true
3049
  },
3050
  "attributes": {}
3051
  }
3052
  },
3053
+ "total_flos": 6.258827541809725e+17,
3054
  "train_batch_size": 8,
3055
  "trial_name": null,
3056
  "trial_params": null