Training in progress, epoch 3
Browse files- model.safetensors +1 -1
- run-2/checkpoint-1066/model.safetensors +1 -1
- run-2/checkpoint-1066/optimizer.pt +1 -1
- run-2/checkpoint-1066/scheduler.pt +1 -1
- run-2/checkpoint-1066/trainer_state.json +21 -24
- run-2/checkpoint-1066/training_args.bin +1 -1
- run-2/checkpoint-1599/model.safetensors +1 -1
- run-2/checkpoint-1599/optimizer.pt +1 -1
- run-2/checkpoint-1599/scheduler.pt +1 -1
- run-2/checkpoint-1599/trainer_state.json +29 -32
- run-2/checkpoint-1599/training_args.bin +1 -1
- runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713458197.544fc269209b.792.2 +2 -2
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:039221f87fe5006e5a007dc56327ae02634d85210f4b85121cfc93ce774db253
|
3 |
size 409103316
|
run-2/checkpoint-1066/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ca35b271530315f2f01ac7abd9889d24774f1c3eedefb9435700d38b775ff18
|
3 |
size 409103316
|
run-2/checkpoint-1066/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:228635f9d33fbffe83173d5ba2f01c1d6d488e839960208c15b03588eda72308
|
3 |
size 818327802
|
run-2/checkpoint-1066/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43c26a4462a8ed7deae233a5c9e29e1937c85b2f67202abb6545afceac724111
|
3 |
size 1064
|
run-2/checkpoint-1066/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "BERT-WMM/run-2/checkpoint-
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 1066,
|
@@ -10,49 +10,46 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.94,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_loss": 0.
|
22 |
-
"eval_runtime":
|
23 |
-
"eval_samples_per_second":
|
24 |
-
"eval_steps_per_second":
|
25 |
"step": 533
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.88,
|
29 |
-
"grad_norm":
|
30 |
-
"learning_rate":
|
31 |
-
"loss": 0.
|
32 |
"step": 1000
|
33 |
},
|
34 |
{
|
35 |
"epoch": 2.0,
|
36 |
-
"eval_accuracy": 0.
|
37 |
-
"eval_loss": 0.
|
38 |
-
"eval_runtime": 2.
|
39 |
-
"eval_samples_per_second":
|
40 |
-
"eval_steps_per_second":
|
41 |
"step": 1066
|
42 |
}
|
43 |
],
|
44 |
"logging_steps": 500,
|
45 |
-
"max_steps":
|
46 |
"num_input_tokens_seen": 0,
|
47 |
-
"num_train_epochs":
|
48 |
"save_steps": 500,
|
49 |
"total_flos": 338261076519408.0,
|
50 |
"train_batch_size": 16,
|
51 |
"trial_name": null,
|
52 |
"trial_params": {
|
53 |
-
"learning_rate":
|
54 |
-
"num_train_epochs": 6,
|
55 |
-
"per_device_train_batch_size": 16,
|
56 |
-
"weight_decay": 0.002285356379677195
|
57 |
}
|
58 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6166056394577026,
|
3 |
+
"best_model_checkpoint": "BERT-WMM/run-2/checkpoint-1066",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 1066,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.94,
|
13 |
+
"grad_norm": 11.24305248260498,
|
14 |
+
"learning_rate": 2.922625428602704e-06,
|
15 |
+
"loss": 0.7917,
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7352112676056338,
|
21 |
+
"eval_loss": 0.647247314453125,
|
22 |
+
"eval_runtime": 2.1238,
|
23 |
+
"eval_samples_per_second": 1002.939,
|
24 |
+
"eval_steps_per_second": 63.096,
|
25 |
"step": 533
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.88,
|
29 |
+
"grad_norm": 23.33644676208496,
|
30 |
+
"learning_rate": 1.5929505293294082e-06,
|
31 |
+
"loss": 0.5783,
|
32 |
"step": 1000
|
33 |
},
|
34 |
{
|
35 |
"epoch": 2.0,
|
36 |
+
"eval_accuracy": 0.7511737089201878,
|
37 |
+
"eval_loss": 0.6166056394577026,
|
38 |
+
"eval_runtime": 2.7158,
|
39 |
+
"eval_samples_per_second": 784.299,
|
40 |
+
"eval_steps_per_second": 49.341,
|
41 |
"step": 1066
|
42 |
}
|
43 |
],
|
44 |
"logging_steps": 500,
|
45 |
+
"max_steps": 1599,
|
46 |
"num_input_tokens_seen": 0,
|
47 |
+
"num_train_epochs": 3,
|
48 |
"save_steps": 500,
|
49 |
"total_flos": 338261076519408.0,
|
50 |
"train_batch_size": 16,
|
51 |
"trial_name": null,
|
52 |
"trial_params": {
|
53 |
+
"learning_rate": 4.252300327876e-06
|
|
|
|
|
|
|
54 |
}
|
55 |
}
|
run-2/checkpoint-1066/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b47f82980a9cdafe965500d0bc73ea3bdf726aae69cba15d6dccf27551f040c
|
3 |
size 4856
|
run-2/checkpoint-1599/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:039221f87fe5006e5a007dc56327ae02634d85210f4b85121cfc93ce774db253
|
3 |
size 409103316
|
run-2/checkpoint-1599/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0cb7773c8e10dd0f24fff2834e66435c66947f925e0e6a1322f10e4198ca110
|
3 |
size 818327802
|
run-2/checkpoint-1599/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:990859fad7f2a4e71666497c0ed5862e191297af7bf46464b786b129838d8ea1
|
3 |
size 1064
|
run-2/checkpoint-1599/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "BERT-WMM/run-2/checkpoint-
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 1599,
|
@@ -10,65 +10,62 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.94,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_loss": 0.
|
22 |
-
"eval_runtime":
|
23 |
-
"eval_samples_per_second":
|
24 |
-
"eval_steps_per_second":
|
25 |
"step": 533
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.88,
|
29 |
-
"grad_norm":
|
30 |
-
"learning_rate":
|
31 |
-
"loss": 0.
|
32 |
"step": 1000
|
33 |
},
|
34 |
{
|
35 |
"epoch": 2.0,
|
36 |
-
"eval_accuracy": 0.
|
37 |
-
"eval_loss": 0.
|
38 |
-
"eval_runtime": 2.
|
39 |
-
"eval_samples_per_second":
|
40 |
-
"eval_steps_per_second":
|
41 |
"step": 1066
|
42 |
},
|
43 |
{
|
44 |
"epoch": 2.81,
|
45 |
-
"grad_norm":
|
46 |
-
"learning_rate":
|
47 |
-
"loss": 0.
|
48 |
"step": 1500
|
49 |
},
|
50 |
{
|
51 |
"epoch": 3.0,
|
52 |
-
"eval_accuracy": 0.
|
53 |
-
"eval_loss": 0.
|
54 |
-
"eval_runtime": 2.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second": 64.
|
57 |
"step": 1599
|
58 |
}
|
59 |
],
|
60 |
"logging_steps": 500,
|
61 |
-
"max_steps":
|
62 |
"num_input_tokens_seen": 0,
|
63 |
-
"num_train_epochs":
|
64 |
"save_steps": 500,
|
65 |
"total_flos": 507646505902536.0,
|
66 |
"train_batch_size": 16,
|
67 |
"trial_name": null,
|
68 |
"trial_params": {
|
69 |
-
"learning_rate":
|
70 |
-
"num_train_epochs": 6,
|
71 |
-
"per_device_train_batch_size": 16,
|
72 |
-
"weight_decay": 0.002285356379677195
|
73 |
}
|
74 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6166056394577026,
|
3 |
+
"best_model_checkpoint": "BERT-WMM/run-2/checkpoint-1066",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 1599,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.94,
|
13 |
+
"grad_norm": 11.24305248260498,
|
14 |
+
"learning_rate": 2.922625428602704e-06,
|
15 |
+
"loss": 0.7917,
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7352112676056338,
|
21 |
+
"eval_loss": 0.647247314453125,
|
22 |
+
"eval_runtime": 2.1238,
|
23 |
+
"eval_samples_per_second": 1002.939,
|
24 |
+
"eval_steps_per_second": 63.096,
|
25 |
"step": 533
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.88,
|
29 |
+
"grad_norm": 23.33644676208496,
|
30 |
+
"learning_rate": 1.5929505293294082e-06,
|
31 |
+
"loss": 0.5783,
|
32 |
"step": 1000
|
33 |
},
|
34 |
{
|
35 |
"epoch": 2.0,
|
36 |
+
"eval_accuracy": 0.7511737089201878,
|
37 |
+
"eval_loss": 0.6166056394577026,
|
38 |
+
"eval_runtime": 2.7158,
|
39 |
+
"eval_samples_per_second": 784.299,
|
40 |
+
"eval_steps_per_second": 49.341,
|
41 |
"step": 1066
|
42 |
},
|
43 |
{
|
44 |
"epoch": 2.81,
|
45 |
+
"grad_norm": 19.725927352905273,
|
46 |
+
"learning_rate": 2.6327563005611253e-07,
|
47 |
+
"loss": 0.5084,
|
48 |
"step": 1500
|
49 |
},
|
50 |
{
|
51 |
"epoch": 3.0,
|
52 |
+
"eval_accuracy": 0.7549295774647887,
|
53 |
+
"eval_loss": 0.6294087171554565,
|
54 |
+
"eval_runtime": 2.0645,
|
55 |
+
"eval_samples_per_second": 1031.734,
|
56 |
+
"eval_steps_per_second": 64.907,
|
57 |
"step": 1599
|
58 |
}
|
59 |
],
|
60 |
"logging_steps": 500,
|
61 |
+
"max_steps": 1599,
|
62 |
"num_input_tokens_seen": 0,
|
63 |
+
"num_train_epochs": 3,
|
64 |
"save_steps": 500,
|
65 |
"total_flos": 507646505902536.0,
|
66 |
"train_batch_size": 16,
|
67 |
"trial_name": null,
|
68 |
"trial_params": {
|
69 |
+
"learning_rate": 4.252300327876e-06
|
|
|
|
|
|
|
70 |
}
|
71 |
}
|
run-2/checkpoint-1599/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b47f82980a9cdafe965500d0bc73ea3bdf726aae69cba15d6dccf27551f040c
|
3 |
size 4856
|
runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713458197.544fc269209b.792.2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a461b67c74679f1a4f5f40cfa5cf536220ee957118befef57aa628677112961e
|
3 |
+
size 6846
|