Training in progress, step 425, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9ece51e98aa5e08bb825978d3489e0352183f84e442b36189d9c7c900ccfa7b
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 341314644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:307dcb2034acb404762de1201cda3ef739d641f9fded961a58856b8c457cf964
|
3 |
size 341314644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f714b289dd09c25736d2eb5fc6375b2e8df7d87d36c1242bd537b49bf96a11fa
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f0fa4917cbab6353dc9863ee5bc208db299bf865d0e1def1c60860a29287ca8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.053612470626831,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-400",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2847,6 +2847,181 @@
|
|
2847 |
"eval_samples_per_second": 9.993,
|
2848 |
"eval_steps_per_second": 2.502,
|
2849 |
"step": 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2850 |
}
|
2851 |
],
|
2852 |
"logging_steps": 1,
|
@@ -2870,12 +3045,12 @@
|
|
2870 |
"should_evaluate": false,
|
2871 |
"should_log": false,
|
2872 |
"should_save": true,
|
2873 |
-
"should_training_stop":
|
2874 |
},
|
2875 |
"attributes": {}
|
2876 |
}
|
2877 |
},
|
2878 |
-
"total_flos":
|
2879 |
"train_batch_size": 8,
|
2880 |
"trial_name": null,
|
2881 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.053612470626831,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-400",
|
4 |
+
"epoch": 1.0011778563015312,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 425,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2847 |
"eval_samples_per_second": 9.993,
|
2848 |
"eval_steps_per_second": 2.502,
|
2849 |
"step": 400
|
2850 |
+
},
|
2851 |
+
{
|
2852 |
+
"epoch": 0.944640753828033,
|
2853 |
+
"grad_norm": 2.428661346435547,
|
2854 |
+
"learning_rate": 8.639676646793382e-07,
|
2855 |
+
"loss": 2.9634,
|
2856 |
+
"step": 401
|
2857 |
+
},
|
2858 |
+
{
|
2859 |
+
"epoch": 0.9469964664310954,
|
2860 |
+
"grad_norm": 2.951463460922241,
|
2861 |
+
"learning_rate": 7.936574269178377e-07,
|
2862 |
+
"loss": 3.2676,
|
2863 |
+
"step": 402
|
2864 |
+
},
|
2865 |
+
{
|
2866 |
+
"epoch": 0.9493521790341578,
|
2867 |
+
"grad_norm": 2.77046537399292,
|
2868 |
+
"learning_rate": 7.263079859864297e-07,
|
2869 |
+
"loss": 3.3325,
|
2870 |
+
"step": 403
|
2871 |
+
},
|
2872 |
+
{
|
2873 |
+
"epoch": 0.9517078916372202,
|
2874 |
+
"grad_norm": 2.883030652999878,
|
2875 |
+
"learning_rate": 6.61923394371039e-07,
|
2876 |
+
"loss": 3.2712,
|
2877 |
+
"step": 404
|
2878 |
+
},
|
2879 |
+
{
|
2880 |
+
"epoch": 0.9540636042402827,
|
2881 |
+
"grad_norm": 2.923128843307495,
|
2882 |
+
"learning_rate": 6.005075261595494e-07,
|
2883 |
+
"loss": 3.4014,
|
2884 |
+
"step": 405
|
2885 |
+
},
|
2886 |
+
{
|
2887 |
+
"epoch": 0.9564193168433451,
|
2888 |
+
"grad_norm": 2.9878504276275635,
|
2889 |
+
"learning_rate": 5.42064076808646e-07,
|
2890 |
+
"loss": 3.7119,
|
2891 |
+
"step": 406
|
2892 |
+
},
|
2893 |
+
{
|
2894 |
+
"epoch": 0.9587750294464076,
|
2895 |
+
"grad_norm": 3.2617733478546143,
|
2896 |
+
"learning_rate": 4.865965629214819e-07,
|
2897 |
+
"loss": 3.5583,
|
2898 |
+
"step": 407
|
2899 |
+
},
|
2900 |
+
{
|
2901 |
+
"epoch": 0.9611307420494699,
|
2902 |
+
"grad_norm": 3.0995283126831055,
|
2903 |
+
"learning_rate": 4.341083220360864e-07,
|
2904 |
+
"loss": 3.511,
|
2905 |
+
"step": 408
|
2906 |
+
},
|
2907 |
+
{
|
2908 |
+
"epoch": 0.9634864546525324,
|
2909 |
+
"grad_norm": 3.3939716815948486,
|
2910 |
+
"learning_rate": 3.846025124245145e-07,
|
2911 |
+
"loss": 3.6265,
|
2912 |
+
"step": 409
|
2913 |
+
},
|
2914 |
+
{
|
2915 |
+
"epoch": 0.9658421672555948,
|
2916 |
+
"grad_norm": 3.365983486175537,
|
2917 |
+
"learning_rate": 3.380821129028489e-07,
|
2918 |
+
"loss": 3.8739,
|
2919 |
+
"step": 410
|
2920 |
+
},
|
2921 |
+
{
|
2922 |
+
"epoch": 0.9681978798586572,
|
2923 |
+
"grad_norm": 3.7531888484954834,
|
2924 |
+
"learning_rate": 2.945499226519322e-07,
|
2925 |
+
"loss": 3.5749,
|
2926 |
+
"step": 411
|
2927 |
+
},
|
2928 |
+
{
|
2929 |
+
"epoch": 0.9705535924617197,
|
2930 |
+
"grad_norm": 4.2459940910339355,
|
2931 |
+
"learning_rate": 2.5400856104894067e-07,
|
2932 |
+
"loss": 4.4775,
|
2933 |
+
"step": 412
|
2934 |
+
},
|
2935 |
+
{
|
2936 |
+
"epoch": 0.9729093050647821,
|
2937 |
+
"grad_norm": 4.396928310394287,
|
2938 |
+
"learning_rate": 2.1646046750978254e-07,
|
2939 |
+
"loss": 4.078,
|
2940 |
+
"step": 413
|
2941 |
+
},
|
2942 |
+
{
|
2943 |
+
"epoch": 0.9752650176678446,
|
2944 |
+
"grad_norm": 4.993990421295166,
|
2945 |
+
"learning_rate": 1.819079013423153e-07,
|
2946 |
+
"loss": 5.1984,
|
2947 |
+
"step": 414
|
2948 |
+
},
|
2949 |
+
{
|
2950 |
+
"epoch": 0.9776207302709069,
|
2951 |
+
"grad_norm": 8.399889945983887,
|
2952 |
+
"learning_rate": 1.503529416103988e-07,
|
2953 |
+
"loss": 4.4625,
|
2954 |
+
"step": 415
|
2955 |
+
},
|
2956 |
+
{
|
2957 |
+
"epoch": 0.9799764428739693,
|
2958 |
+
"grad_norm": 5.020269870758057,
|
2959 |
+
"learning_rate": 1.2179748700879012e-07,
|
2960 |
+
"loss": 4.6616,
|
2961 |
+
"step": 416
|
2962 |
+
},
|
2963 |
+
{
|
2964 |
+
"epoch": 0.9823321554770318,
|
2965 |
+
"grad_norm": 6.619906902313232,
|
2966 |
+
"learning_rate": 9.624325574890125e-08,
|
2967 |
+
"loss": 5.0221,
|
2968 |
+
"step": 417
|
2969 |
+
},
|
2970 |
+
{
|
2971 |
+
"epoch": 0.9846878680800942,
|
2972 |
+
"grad_norm": 6.48217248916626,
|
2973 |
+
"learning_rate": 7.369178545542088e-08,
|
2974 |
+
"loss": 4.5943,
|
2975 |
+
"step": 418
|
2976 |
+
},
|
2977 |
+
{
|
2978 |
+
"epoch": 0.9870435806831567,
|
2979 |
+
"grad_norm": 7.419217109680176,
|
2980 |
+
"learning_rate": 5.4144433073771707e-08,
|
2981 |
+
"loss": 5.5227,
|
2982 |
+
"step": 419
|
2983 |
+
},
|
2984 |
+
{
|
2985 |
+
"epoch": 0.9893992932862191,
|
2986 |
+
"grad_norm": 8.479371070861816,
|
2987 |
+
"learning_rate": 3.760237478849793e-08,
|
2988 |
+
"loss": 5.264,
|
2989 |
+
"step": 420
|
2990 |
+
},
|
2991 |
+
{
|
2992 |
+
"epoch": 0.9917550058892816,
|
2993 |
+
"grad_norm": 18.458097457885742,
|
2994 |
+
"learning_rate": 2.4066605952444142e-08,
|
2995 |
+
"loss": 5.3962,
|
2996 |
+
"step": 421
|
2997 |
+
},
|
2998 |
+
{
|
2999 |
+
"epoch": 0.9941107184923439,
|
3000 |
+
"grad_norm": 10.58730697631836,
|
3001 |
+
"learning_rate": 1.3537941026914303e-08,
|
3002 |
+
"loss": 4.2135,
|
3003 |
+
"step": 422
|
3004 |
+
},
|
3005 |
+
{
|
3006 |
+
"epoch": 0.9964664310954063,
|
3007 |
+
"grad_norm": 12.496702194213867,
|
3008 |
+
"learning_rate": 6.017013532627624e-09,
|
3009 |
+
"loss": 4.5984,
|
3010 |
+
"step": 423
|
3011 |
+
},
|
3012 |
+
{
|
3013 |
+
"epoch": 0.9988221436984688,
|
3014 |
+
"grad_norm": 18.563827514648438,
|
3015 |
+
"learning_rate": 1.5042760116212861e-09,
|
3016 |
+
"loss": 4.4128,
|
3017 |
+
"step": 424
|
3018 |
+
},
|
3019 |
+
{
|
3020 |
+
"epoch": 1.0011778563015312,
|
3021 |
+
"grad_norm": 32.98548889160156,
|
3022 |
+
"learning_rate": 0.0,
|
3023 |
+
"loss": 5.637,
|
3024 |
+
"step": 425
|
3025 |
}
|
3026 |
],
|
3027 |
"logging_steps": 1,
|
|
|
3045 |
"should_evaluate": false,
|
3046 |
"should_log": false,
|
3047 |
"should_save": true,
|
3048 |
+
"should_training_stop": true
|
3049 |
},
|
3050 |
"attributes": {}
|
3051 |
}
|
3052 |
},
|
3053 |
+
"total_flos": 6.258827541809725e+17,
|
3054 |
"train_batch_size": 8,
|
3055 |
"trial_name": null,
|
3056 |
"trial_params": null
|