Training in progress, step 100, checkpoint
Browse files- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4990951248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28b34625a8715b00a596425b15ee66b124a4a7c3839e8e8697370b2b5307b014
|
3 |
size 4990951248
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559197598
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e102159806ad9af0c0ec6c0199516dcc7b34bd576b8eec9fe119f832253f214a
|
3 |
size 559197598
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5640674860
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa5fe98727f7cf6b7ef710f9d8b2f8806f581b27b46fa2671c2078a23ec08ffa
|
3 |
size 5640674860
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4aa11e1920a65bd6f9d3d7705a39b4eb273d4b1d13425f4d93bdc9a32a38cf4
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b16a3f4fd0bbb2ef54173b8f8f9b473c05114aa603c01260cd9c7f8be1b6a9da
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47fb916367dfd73f6e83cfd68c3949b4fc2131be1fd25bfd542ffd71c9e3d87b
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98bbea1742a7fc36512eb73ae38f6fa9b654a9feabba8b189726de9331137352
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:898092e0bf0f6dbc7c89ddc78136fe76ce924561e71751216da91be467d8b5d1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -103,6 +103,28 @@
|
|
103 |
"eval_samples_per_second": 73.94,
|
104 |
"eval_steps_per_second": 3.103,
|
105 |
"step": 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
}
|
107 |
],
|
108 |
"logging_steps": 10,
|
@@ -122,7 +144,7 @@
|
|
122 |
"attributes": {}
|
123 |
}
|
124 |
},
|
125 |
-
"total_flos":
|
126 |
"train_batch_size": 6,
|
127 |
"trial_name": null,
|
128 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.225806451612903,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
103 |
"eval_samples_per_second": 73.94,
|
104 |
"eval_steps_per_second": 3.103,
|
105 |
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 2.903225806451613,
|
109 |
+
"grad_norm": 32.25,
|
110 |
+
"learning_rate": 0.00019090065350491626,
|
111 |
+
"loss": 2.4592,
|
112 |
+
"step": 90
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 3.225806451612903,
|
116 |
+
"grad_norm": 61.25,
|
117 |
+
"learning_rate": 0.0001879473751206489,
|
118 |
+
"loss": 2.3718,
|
119 |
+
"step": 100
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 3.225806451612903,
|
123 |
+
"eval_loss": 2.7737066745758057,
|
124 |
+
"eval_runtime": 20.2807,
|
125 |
+
"eval_samples_per_second": 74.011,
|
126 |
+
"eval_steps_per_second": 3.106,
|
127 |
+
"step": 100
|
128 |
}
|
129 |
],
|
130 |
"logging_steps": 10,
|
|
|
144 |
"attributes": {}
|
145 |
}
|
146 |
},
|
147 |
+
"total_flos": 7.791231950965965e+16,
|
148 |
"train_batch_size": 6,
|
149 |
"trial_name": null,
|
150 |
"trial_params": null
|