Training in progress, step 3100
Browse files- adapter_model.safetensors +1 -1
- metrics.json +1 -0
- state.json +33 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35668592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:487d48efb52d69cb5eca5d0442f4f8d423e30d7bb977365b8313665dc2c93ede
|
3 |
size 35668592
|
metrics.json
CHANGED
@@ -59,3 +59,4 @@
|
|
59 |
{"Step":2950,"eval_loss":1.7549589872,"eval_runtime":29.4284,"eval_samples_per_second":3.398,"eval_steps_per_second":0.442,"epoch":2.3173605656}
|
60 |
{"Step":3000,"eval_loss":1.7528626919,"eval_runtime":29.4272,"eval_samples_per_second":3.398,"eval_steps_per_second":0.442,"epoch":2.3566378633}
|
61 |
{"Step":3050,"eval_loss":1.7513557673,"eval_runtime":29.577,"eval_samples_per_second":3.381,"eval_steps_per_second":0.44,"epoch":2.395915161}
|
|
|
|
59 |
{"Step":2950,"eval_loss":1.7549589872,"eval_runtime":29.4284,"eval_samples_per_second":3.398,"eval_steps_per_second":0.442,"epoch":2.3173605656}
|
60 |
{"Step":3000,"eval_loss":1.7528626919,"eval_runtime":29.4272,"eval_samples_per_second":3.398,"eval_steps_per_second":0.442,"epoch":2.3566378633}
|
61 |
{"Step":3050,"eval_loss":1.7513557673,"eval_runtime":29.577,"eval_samples_per_second":3.381,"eval_steps_per_second":0.44,"epoch":2.395915161}
|
62 |
+
{"Step":3100,"eval_loss":1.7509515285,"eval_runtime":29.3774,"eval_samples_per_second":3.404,"eval_steps_per_second":0.443,"epoch":2.4351924588}
|
state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -907,6 +907,36 @@
|
|
907 |
"eval_samples_per_second": 3.398,
|
908 |
"eval_steps_per_second": 0.442,
|
909 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
}
|
911 |
],
|
912 |
"logging_steps": 50,
|
@@ -926,7 +956,7 @@
|
|
926 |
"attributes": {}
|
927 |
}
|
928 |
},
|
929 |
-
"total_flos": 3.
|
930 |
"train_batch_size": 8,
|
931 |
"trial_name": null,
|
932 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.4359780047132755,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 3101,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
907 |
"eval_samples_per_second": 3.398,
|
908 |
"eval_steps_per_second": 0.442,
|
909 |
"step": 3000
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 2.3959151610369207,
|
913 |
+
"grad_norm": 0.20650461316108704,
|
914 |
+
"learning_rate": 0.00011187081757346524,
|
915 |
+
"loss": 1.7575,
|
916 |
+
"step": 3050
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"epoch": 2.3959151610369207,
|
920 |
+
"eval_loss": 1.751355767250061,
|
921 |
+
"eval_runtime": 29.577,
|
922 |
+
"eval_samples_per_second": 3.381,
|
923 |
+
"eval_steps_per_second": 0.44,
|
924 |
+
"step": 3050
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 2.4351924587588374,
|
928 |
+
"grad_norm": 0.22746974229812622,
|
929 |
+
"learning_rate": 0.00010459703229560663,
|
930 |
+
"loss": 1.7547,
|
931 |
+
"step": 3100
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"epoch": 2.4351924587588374,
|
935 |
+
"eval_loss": 1.7509515285491943,
|
936 |
+
"eval_runtime": 29.3774,
|
937 |
+
"eval_samples_per_second": 3.404,
|
938 |
+
"eval_steps_per_second": 0.443,
|
939 |
+
"step": 3100
|
940 |
}
|
941 |
],
|
942 |
"logging_steps": 50,
|
|
|
956 |
"attributes": {}
|
957 |
}
|
958 |
},
|
959 |
+
"total_flos": 3.89905821256704e+16,
|
960 |
"train_batch_size": 8,
|
961 |
"trial_name": null,
|
962 |
"trial_params": null
|