Training in progress, epoch 1
Browse files- logs/events.out.tfevents.1709696898.a2333cf40ebd.21010.2 +2 -2
- logs/events.out.tfevents.1709697132.a2333cf40ebd.21010.3 +3 -0
- model.safetensors +1 -1
- run-2/checkpoint-192/model.safetensors +1 -1
- run-2/checkpoint-192/optimizer.pt +1 -1
- run-2/checkpoint-192/scheduler.pt +1 -1
- run-2/checkpoint-192/trainer_state.json +29 -29
- run-2/checkpoint-192/training_args.bin +1 -1
- run-3/checkpoint-96/model.safetensors +1 -1
- run-3/checkpoint-96/optimizer.pt +1 -1
- run-3/checkpoint-96/scheduler.pt +1 -1
- run-3/checkpoint-96/trainer_state.json +18 -18
- run-3/checkpoint-96/training_args.bin +1 -1
- training_args.bin +1 -1
logs/events.out.tfevents.1709696898.a2333cf40ebd.21010.2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:472d2c079ca368f63e847799aa435aa51a891cbb7f605f5b0397c83e12d17732
|
3 |
+
size 6351
|
logs/events.out.tfevents.1709697132.a2333cf40ebd.21010.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a291ac31fba750739770fa01afde949394278bdc98f20828e00ff7238845a6ab
|
3 |
+
size 5315
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:446c2c31d01eea9266672f75b6ffe66317a4cdf5130baa8b7dd4d988ec7f3dd9
|
3 |
size 17549312
|
run-2/checkpoint-192/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:444ec7032343c5e48a0bebcbc727c0d6dcc53de822b7281218d78d3d52fc74a5
|
3 |
size 17549312
|
run-2/checkpoint-192/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35122746
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7d65e468a5ea6ece563e4a260f8a916b8919ff2c935298cbc0e77b353da1ae9
|
3 |
size 35122746
|
run-2/checkpoint-192/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76b1ca96b43548704a5f4d11597321c5afbd7e065f2cc83810db95ee4c1c77f7
|
3 |
size 1064
|
run-2/checkpoint-192/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-192",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,55 +10,55 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 0.
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second": 1.
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"grad_norm": 2.
|
33 |
-
"learning_rate": 0.
|
34 |
-
"loss": 0.
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
-
"eval_accuracy": 0.
|
40 |
-
"eval_f1": 0.
|
41 |
-
"eval_loss": 0.
|
42 |
-
"eval_precision": 0.
|
43 |
-
"eval_recall": 0.
|
44 |
-
"eval_runtime":
|
45 |
-
"eval_samples_per_second":
|
46 |
-
"eval_steps_per_second": 1.
|
47 |
"step": 192
|
48 |
}
|
49 |
],
|
50 |
"logging_steps": 500,
|
51 |
-
"max_steps":
|
52 |
"num_input_tokens_seen": 0,
|
53 |
-
"num_train_epochs":
|
54 |
"save_steps": 500,
|
55 |
"total_flos": 471390394560.0,
|
56 |
"train_batch_size": 32,
|
57 |
"trial_name": null,
|
58 |
"trial_params": {
|
59 |
-
"alpha": 0.
|
60 |
-
"learning_rate": 0.
|
61 |
-
"num_train_epochs":
|
62 |
-
"temperature":
|
63 |
}
|
64 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8140900195694716,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-192",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 3.4323604106903076,
|
14 |
+
"learning_rate": 0.0004667999543114754,
|
15 |
+
"loss": 0.533,
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7984344422700587,
|
21 |
+
"eval_f1": 0.8049242424242424,
|
22 |
+
"eval_loss": 0.4442267119884491,
|
23 |
+
"eval_precision": 0.7798165137614679,
|
24 |
+
"eval_recall": 0.8317025440313112,
|
25 |
+
"eval_runtime": 28.6457,
|
26 |
+
"eval_samples_per_second": 35.677,
|
27 |
+
"eval_steps_per_second": 1.117,
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"grad_norm": 2.4163033962249756,
|
33 |
+
"learning_rate": 0.0,
|
34 |
+
"loss": 0.4374,
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.8140900195694716,
|
40 |
+
"eval_f1": 0.8243992606284659,
|
41 |
+
"eval_loss": 0.4299594759941101,
|
42 |
+
"eval_precision": 0.7810858143607706,
|
43 |
+
"eval_recall": 0.87279843444227,
|
44 |
+
"eval_runtime": 28.93,
|
45 |
+
"eval_samples_per_second": 35.327,
|
46 |
+
"eval_steps_per_second": 1.106,
|
47 |
"step": 192
|
48 |
}
|
49 |
],
|
50 |
"logging_steps": 500,
|
51 |
+
"max_steps": 192,
|
52 |
"num_input_tokens_seen": 0,
|
53 |
+
"num_train_epochs": 2,
|
54 |
"save_steps": 500,
|
55 |
"total_flos": 471390394560.0,
|
56 |
"train_batch_size": 32,
|
57 |
"trial_name": null,
|
58 |
"trial_params": {
|
59 |
+
"alpha": 0.88253398201078,
|
60 |
+
"learning_rate": 0.0009335999086229508,
|
61 |
+
"num_train_epochs": 2,
|
62 |
+
"temperature": 19
|
63 |
}
|
64 |
}
|
run-2/checkpoint-192/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2659042d49fa36ce1404b1f7057dd4643dc22f653d1fc44d521700862149e4b7
|
3 |
size 4920
|
run-3/checkpoint-96/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:446c2c31d01eea9266672f75b6ffe66317a4cdf5130baa8b7dd4d988ec7f3dd9
|
3 |
size 17549312
|
run-3/checkpoint-96/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35122746
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bde5e387217b41822b0ca9ea98caa7129bea059867adb471204d76b01650315d
|
3 |
size 35122746
|
run-3/checkpoint-96/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f89cc7dd2ab1ceae4cc5d3844408884d87d0a8424a3ba21161199ee633ef9b97
|
3 |
size 1064
|
run-3/checkpoint-96/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-96",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
@@ -10,36 +10,36 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second": 1.
|
28 |
"step": 96
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
-
"max_steps":
|
33 |
"num_input_tokens_seen": 0,
|
34 |
-
"num_train_epochs":
|
35 |
"save_steps": 500,
|
36 |
"total_flos": 235695197280.0,
|
37 |
"train_batch_size": 32,
|
38 |
"trial_name": null,
|
39 |
"trial_params": {
|
40 |
-
"alpha": 0.
|
41 |
-
"learning_rate":
|
42 |
-
"num_train_epochs":
|
43 |
-
"temperature":
|
44 |
}
|
45 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-96",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 2.101290702819824,
|
14 |
+
"learning_rate": 1.1962059637335337e-06,
|
15 |
+
"loss": 0.5173,
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.5,
|
21 |
+
"eval_f1": 0.6666666666666666,
|
22 |
+
"eval_loss": 0.5046124458312988,
|
23 |
+
"eval_precision": 0.5,
|
24 |
+
"eval_recall": 1.0,
|
25 |
+
"eval_runtime": 28.251,
|
26 |
+
"eval_samples_per_second": 36.176,
|
27 |
+
"eval_steps_per_second": 1.133,
|
28 |
"step": 96
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
+
"max_steps": 864,
|
33 |
"num_input_tokens_seen": 0,
|
34 |
+
"num_train_epochs": 9,
|
35 |
"save_steps": 500,
|
36 |
"total_flos": 235695197280.0,
|
37 |
"train_batch_size": 32,
|
38 |
"trial_name": null,
|
39 |
"trial_params": {
|
40 |
+
"alpha": 0.586704262013931,
|
41 |
+
"learning_rate": 1.3457317092002255e-06,
|
42 |
+
"num_train_epochs": 9,
|
43 |
+
"temperature": 21
|
44 |
}
|
45 |
}
|
run-3/checkpoint-96/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b765cab6920f955460cc4f812305ba7f085db0f4dbb906f08e22ff912fece657
|
3 |
size 4920
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b765cab6920f955460cc4f812305ba7f085db0f4dbb906f08e22ff912fece657
|
3 |
size 4920
|