Training in progress, step 278, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8cf8057910d23a062865985ba852f2938eae817046dfaf5681661132eaf58de
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37431220
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3538e3394b23b3adf8ac36f79431127cf553e2ff2786b3ddb23d8b296f28eeaa
|
3 |
size 37431220
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0786adcac2c7889cfa58c3afdb23ea4f83c558cdd0755806988521c514ef08d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1925,6 +1925,34 @@
|
|
1925 |
"learning_rate": 6.818181818181818e-05,
|
1926 |
"loss": 1.1167,
|
1927 |
"step": 274
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1928 |
}
|
1929 |
],
|
1930 |
"logging_steps": 1,
|
@@ -1944,7 +1972,7 @@
|
|
1944 |
"attributes": {}
|
1945 |
}
|
1946 |
},
|
1947 |
-
"total_flos": 3.
|
1948 |
"train_batch_size": 4,
|
1949 |
"trial_name": null,
|
1950 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7626886145404664,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 278,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1925 |
"learning_rate": 6.818181818181818e-05,
|
1926 |
"loss": 1.1167,
|
1927 |
"step": 274
|
1928 |
+
},
|
1929 |
+
{
|
1930 |
+
"epoch": 0.7544581618655692,
|
1931 |
+
"grad_norm": 0.15378378331661224,
|
1932 |
+
"learning_rate": 6.742424242424242e-05,
|
1933 |
+
"loss": 1.2406,
|
1934 |
+
"step": 275
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 0.757201646090535,
|
1938 |
+
"grad_norm": 0.16972492635250092,
|
1939 |
+
"learning_rate": 6.666666666666667e-05,
|
1940 |
+
"loss": 1.2506,
|
1941 |
+
"step": 276
|
1942 |
+
},
|
1943 |
+
{
|
1944 |
+
"epoch": 0.7599451303155007,
|
1945 |
+
"grad_norm": 0.16112364828586578,
|
1946 |
+
"learning_rate": 6.59090909090909e-05,
|
1947 |
+
"loss": 1.1676,
|
1948 |
+
"step": 277
|
1949 |
+
},
|
1950 |
+
{
|
1951 |
+
"epoch": 0.7626886145404664,
|
1952 |
+
"grad_norm": 0.1625635176897049,
|
1953 |
+
"learning_rate": 6.515151515151516e-05,
|
1954 |
+
"loss": 1.1607,
|
1955 |
+
"step": 278
|
1956 |
}
|
1957 |
],
|
1958 |
"logging_steps": 1,
|
|
|
1972 |
"attributes": {}
|
1973 |
}
|
1974 |
},
|
1975 |
+
"total_flos": 3.438408126791516e+17,
|
1976 |
"train_batch_size": 4,
|
1977 |
"trial_name": null,
|
1978 |
"trial_params": null
|