Training in progress, epoch 1
Browse files- logs/events.out.tfevents.1711296799.8cc2eef2edb7.2942.1 +2 -2
- logs/events.out.tfevents.1711296858.8cc2eef2edb7.2942.2 +3 -0
- model.safetensors +1 -1
- run-1/checkpoint-891/config.json +1 -1
- run-1/checkpoint-891/model.safetensors +1 -1
- run-1/checkpoint-891/optimizer.pt +2 -2
- run-1/checkpoint-891/rng_state.pth +2 -2
- run-1/checkpoint-891/scheduler.pt +1 -1
- run-1/checkpoint-891/tokenizer.json +1 -1
- run-1/checkpoint-891/trainer_state.json +45 -33
- run-1/checkpoint-891/training_args.bin +2 -2
- run-2/checkpoint-297/config.json +1 -1
- run-2/checkpoint-297/model.safetensors +1 -1
- run-2/checkpoint-297/optimizer.pt +2 -2
- run-2/checkpoint-297/rng_state.pth +2 -2
- run-2/checkpoint-297/scheduler.pt +1 -1
- run-2/checkpoint-297/tokenizer.json +1 -1
- run-2/checkpoint-297/trainer_state.json +17 -13
- run-2/checkpoint-297/training_args.bin +2 -2
- training_args.bin +2 -2
logs/events.out.tfevents.1711296799.8cc2eef2edb7.2942.1
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f1a5654e3d6fba66852b0b3fdbbe538b4d682b3fd4796ddd8536724429535cd
|
3 |
+
size 7220
|
logs/events.out.tfevents.1711296858.8cc2eef2edb7.2942.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9474f9bb69d62b8f8bbe6dd7caf55abc5b4f0dc9fc15dfe2101e9aa6fdd738c
|
3 |
+
size 5407
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcd6611d9aeb31d0657795a4eaca47f364cd69272d929d8b6df5c3bda9e50e99
|
3 |
size 17549312
|
run-1/checkpoint-891/config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.39.1",
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
run-1/checkpoint-891/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9040181db8d76005f676d97b5534a17a4eb2972656beafd2186dc0a419359f6a
|
3 |
size 17549312
|
run-1/checkpoint-891/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e2af4c26f15a15ff765a52e923766a4900ce6828c6c1331d3fd8bfb0624998a
|
3 |
+
size 35123898
|
run-1/checkpoint-891/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45b689ca5add5bd2b3024a96e15979227e5788dfce70568554fb1702c585abab
|
3 |
+
size 14308
|
run-1/checkpoint-891/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:291008d3b6238846646f9b85fa92243b0cc21db6c35ba667d2fe4cc4e955c671
|
3 |
size 1064
|
run-1/checkpoint-891/tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 31,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
run-1/checkpoint-891/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 891,
|
@@ -10,65 +10,77 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 297
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
|
|
|
|
|
|
|
|
25 |
"step": 297
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"grad_norm":
|
30 |
-
"learning_rate":
|
31 |
-
"loss": 0.
|
32 |
"step": 594
|
33 |
},
|
34 |
{
|
35 |
"epoch": 2.0,
|
36 |
-
"eval_accuracy": 0.
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
|
|
|
|
|
|
|
|
41 |
"step": 594
|
42 |
},
|
43 |
{
|
44 |
"epoch": 3.0,
|
45 |
-
"grad_norm":
|
46 |
-
"learning_rate": 0.
|
47 |
-
"loss": 0.
|
48 |
"step": 891
|
49 |
},
|
50 |
{
|
51 |
"epoch": 3.0,
|
52 |
-
"eval_accuracy": 0.
|
53 |
-
"
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"
|
|
|
|
|
|
|
|
|
57 |
"step": 891
|
58 |
}
|
59 |
],
|
60 |
"logging_steps": 500,
|
61 |
-
"max_steps":
|
62 |
"num_input_tokens_seen": 0,
|
63 |
-
"num_train_epochs":
|
64 |
"save_steps": 500,
|
65 |
-
"total_flos":
|
66 |
"train_batch_size": 32,
|
67 |
"trial_name": null,
|
68 |
"trial_params": {
|
69 |
-
"alpha": 0.
|
70 |
-
"learning_rate":
|
71 |
-
"num_train_epochs":
|
72 |
-
"temperature":
|
73 |
}
|
74 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5425742574257426,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-594",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 891,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.9758312702178955,
|
14 |
+
"learning_rate": 2.4789676674761582e-05,
|
15 |
+
"loss": 0.4903,
|
16 |
"step": 297
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.502970297029703,
|
21 |
+
"eval_f1": 0.08058608058608059,
|
22 |
+
"eval_loss": 0.48342981934547424,
|
23 |
+
"eval_mcc": 0.010331488326433175,
|
24 |
+
"eval_precision": 0.5238095238095238,
|
25 |
+
"eval_recall": 0.04365079365079365,
|
26 |
+
"eval_runtime": 0.9315,
|
27 |
+
"eval_samples_per_second": 542.124,
|
28 |
+
"eval_steps_per_second": 17.176,
|
29 |
"step": 297
|
30 |
},
|
31 |
{
|
32 |
"epoch": 2.0,
|
33 |
+
"grad_norm": 0.7211948037147522,
|
34 |
+
"learning_rate": 1.2394838337380791e-05,
|
35 |
+
"loss": 0.4812,
|
36 |
"step": 594
|
37 |
},
|
38 |
{
|
39 |
"epoch": 2.0,
|
40 |
+
"eval_accuracy": 0.5425742574257426,
|
41 |
+
"eval_f1": 0.3773584905660377,
|
42 |
+
"eval_loss": 0.4795511066913605,
|
43 |
+
"eval_mcc": 0.09908299956847767,
|
44 |
+
"eval_precision": 0.5882352941176471,
|
45 |
+
"eval_recall": 0.2777777777777778,
|
46 |
+
"eval_runtime": 0.9303,
|
47 |
+
"eval_samples_per_second": 542.812,
|
48 |
+
"eval_steps_per_second": 17.198,
|
49 |
"step": 594
|
50 |
},
|
51 |
{
|
52 |
"epoch": 3.0,
|
53 |
+
"grad_norm": 0.8099855184555054,
|
54 |
+
"learning_rate": 0.0,
|
55 |
+
"loss": 0.4778,
|
56 |
"step": 891
|
57 |
},
|
58 |
{
|
59 |
"epoch": 3.0,
|
60 |
+
"eval_accuracy": 0.5326732673267327,
|
61 |
+
"eval_f1": 0.32954545454545453,
|
62 |
+
"eval_loss": 0.47736045718193054,
|
63 |
+
"eval_mcc": 0.08048876531271514,
|
64 |
+
"eval_precision": 0.58,
|
65 |
+
"eval_recall": 0.23015873015873015,
|
66 |
+
"eval_runtime": 0.9394,
|
67 |
+
"eval_samples_per_second": 537.574,
|
68 |
+
"eval_steps_per_second": 17.032,
|
69 |
"step": 891
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 500,
|
73 |
+
"max_steps": 891,
|
74 |
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 3,
|
76 |
"save_steps": 500,
|
77 |
+
"total_flos": 2192103797940.0,
|
78 |
"train_batch_size": 32,
|
79 |
"trial_name": null,
|
80 |
"trial_params": {
|
81 |
+
"alpha": 0.6389006598546823,
|
82 |
+
"learning_rate": 3.718451501214238e-05,
|
83 |
+
"num_train_epochs": 3,
|
84 |
+
"temperature": 17
|
85 |
}
|
86 |
}
|
run-1/checkpoint-891/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6731b358035f4120a393e277eadf83407dfaf70785f0599f775fe36a1054fb1
|
3 |
+
size 4920
|
run-2/checkpoint-297/config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.39.1",
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
run-2/checkpoint-297/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcd6611d9aeb31d0657795a4eaca47f364cd69272d929d8b6df5c3bda9e50e99
|
3 |
size 17549312
|
run-2/checkpoint-297/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5715f10ee578dc0e25cbb1e64595d2d14801c8fac136a5b282b868d389c6ae
|
3 |
+
size 35123898
|
run-2/checkpoint-297/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5f5e396c574b9b2ecc40d3f9e87d628a8f4be69caef81905a47fcf8895067e6
|
3 |
+
size 14308
|
run-2/checkpoint-297/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eec7f7712dcbdabc9e134706fd1460f920b47b7ffce0b705c1cbe548ebce60db
|
3 |
size 1064
|
run-2/checkpoint-297/tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 31,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
run-2/checkpoint-297/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-297",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
@@ -10,18 +10,22 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 1.
|
15 |
-
"loss": 0.
|
16 |
"step": 297
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
|
|
|
|
|
|
|
|
25 |
"step": 297
|
26 |
}
|
27 |
],
|
@@ -30,13 +34,13 @@
|
|
30 |
"num_input_tokens_seen": 0,
|
31 |
"num_train_epochs": 9,
|
32 |
"save_steps": 500,
|
33 |
-
"total_flos":
|
34 |
"train_batch_size": 32,
|
35 |
"trial_name": null,
|
36 |
"trial_params": {
|
37 |
-
"alpha": 0.
|
38 |
-
"learning_rate": 1.
|
39 |
"num_train_epochs": 9,
|
40 |
-
"temperature":
|
41 |
}
|
42 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4910891089108911,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-297",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.6258772015571594,
|
14 |
+
"learning_rate": 1.171336966673164e-05,
|
15 |
+
"loss": 0.303,
|
16 |
"step": 297
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.4910891089108911,
|
21 |
+
"eval_f1": 0.0,
|
22 |
+
"eval_loss": 0.28316354751586914,
|
23 |
+
"eval_mcc": -0.09980217586956908,
|
24 |
+
"eval_precision": 0.0,
|
25 |
+
"eval_recall": 0.0,
|
26 |
+
"eval_runtime": 0.9281,
|
27 |
+
"eval_samples_per_second": 544.113,
|
28 |
+
"eval_steps_per_second": 17.239,
|
29 |
"step": 297
|
30 |
}
|
31 |
],
|
|
|
34 |
"num_input_tokens_seen": 0,
|
35 |
"num_train_epochs": 9,
|
36 |
"save_steps": 500,
|
37 |
+
"total_flos": 730701265980.0,
|
38 |
"train_batch_size": 32,
|
39 |
"trial_name": null,
|
40 |
"trial_params": {
|
41 |
+
"alpha": 0.33782488262757904,
|
42 |
+
"learning_rate": 1.3177540875073095e-05,
|
43 |
"num_train_epochs": 9,
|
44 |
+
"temperature": 26
|
45 |
}
|
46 |
}
|
run-2/checkpoint-297/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28b3023982056159c033a3a1a25c2d159010d9ff247770a44c7d7d6d0e3bba5b
|
3 |
+
size 4920
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28b3023982056159c033a3a1a25c2d159010d9ff247770a44c7d7d6d0e3bba5b
|
3 |
+
size 4920
|