Training in progress, epoch 1
Browse files- logs/events.out.tfevents.1711205777.a08db75a0e93.4335.1 +2 -2
- logs/events.out.tfevents.1711206480.a08db75a0e93.4335.2 +3 -0
- model.safetensors +1 -1
- run-0/checkpoint-480/config.json +1 -1
- run-0/checkpoint-480/model.safetensors +1 -1
- run-0/checkpoint-480/optimizer.pt +1 -1
- run-0/checkpoint-480/scheduler.pt +1 -1
- run-0/checkpoint-480/trainer_state.json +62 -62
- run-0/checkpoint-480/training_args.bin +1 -1
- run-0/checkpoint-576/config.json +1 -1
- run-0/checkpoint-576/model.safetensors +1 -1
- run-0/checkpoint-576/optimizer.pt +1 -1
- run-0/checkpoint-576/scheduler.pt +1 -1
- run-0/checkpoint-576/trainer_state.json +73 -73
- run-0/checkpoint-576/training_args.bin +1 -1
- run-1/checkpoint-96/config.json +1 -1
- run-1/checkpoint-96/model.safetensors +1 -1
- run-1/checkpoint-96/optimizer.pt +1 -1
- run-1/checkpoint-96/rng_state.pth +1 -1
- run-1/checkpoint-96/scheduler.pt +1 -1
- run-1/checkpoint-96/trainer_state.json +22 -41
- run-1/checkpoint-96/training_args.bin +1 -1
- training_args.bin +1 -1
logs/events.out.tfevents.1711205777.a08db75a0e93.4335.1
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bff85464490dd0c2632a7c659a2811039740add28aa47e1cde4ba5ecc6c10d71
|
3 |
+
size 9115
|
logs/events.out.tfevents.1711206480.a08db75a0e93.4335.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43ef9c7e2f742e721ec2c99dcab1e03e1eafb9bbb301a2001f1c824a4429488b
|
3 |
+
size 5346
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55c05c9804e560e83219b8d7f0e16c10a78191786fdb883860175414f3c5708d
|
3 |
size 17549312
|
run-0/checkpoint-480/config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.39.1",
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
run-0/checkpoint-480/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e98d696ca97335f654ced1eb00ab1534239523ee99300ca68326a6a3b8e1f06d
|
3 |
size 17549312
|
run-0/checkpoint-480/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35122746
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca9bb8cc1f373169b1d60490c9559d74ce9f27d110c797ba8a2951a4afa6860f
|
3 |
size 35122746
|
run-0/checkpoint-480/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5df72b52b13bd8f329c563313c92428765b4b79e0489af632784b2c0db70304f
|
3 |
size 1064
|
run-0/checkpoint-480/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-480",
|
4 |
"epoch": 5.0,
|
5 |
"eval_steps": 500,
|
@@ -10,112 +10,112 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second":
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"grad_norm": 1.
|
33 |
-
"learning_rate":
|
34 |
-
"loss": 0.
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
-
"eval_accuracy": 0.
|
40 |
-
"eval_f1": 0.
|
41 |
-
"eval_loss": 0.
|
42 |
-
"eval_precision": 0.
|
43 |
-
"eval_recall": 0.
|
44 |
-
"eval_runtime":
|
45 |
-
"eval_samples_per_second":
|
46 |
-
"eval_steps_per_second": 1.
|
47 |
"step": 192
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
-
"grad_norm":
|
52 |
-
"learning_rate":
|
53 |
-
"loss": 0.
|
54 |
"step": 288
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
-
"eval_accuracy": 0.
|
59 |
-
"eval_f1": 0.
|
60 |
-
"eval_loss": 0.
|
61 |
-
"eval_precision": 0.
|
62 |
-
"eval_recall": 0.
|
63 |
-
"eval_runtime":
|
64 |
-
"eval_samples_per_second":
|
65 |
-
"eval_steps_per_second": 1.
|
66 |
"step": 288
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"grad_norm":
|
71 |
-
"learning_rate": 3.
|
72 |
-
"loss": 0.
|
73 |
"step": 384
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
-
"eval_accuracy": 0.
|
78 |
-
"eval_f1": 0.
|
79 |
-
"eval_loss": 0.
|
80 |
-
"eval_precision": 0.
|
81 |
-
"eval_recall": 0.
|
82 |
-
"eval_runtime":
|
83 |
-
"eval_samples_per_second":
|
84 |
-
"eval_steps_per_second": 1.
|
85 |
"step": 384
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
-
"grad_norm":
|
90 |
-
"learning_rate":
|
91 |
-
"loss": 0.
|
92 |
"step": 480
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
-
"eval_accuracy": 0.
|
97 |
-
"eval_f1": 0.
|
98 |
-
"eval_loss": 0.
|
99 |
-
"eval_precision": 0.
|
100 |
-
"eval_recall": 0.
|
101 |
-
"eval_runtime":
|
102 |
-
"eval_samples_per_second":
|
103 |
-
"eval_steps_per_second": 1.
|
104 |
"step": 480
|
105 |
}
|
106 |
],
|
107 |
"logging_steps": 500,
|
108 |
-
"max_steps":
|
109 |
"num_input_tokens_seen": 0,
|
110 |
-
"num_train_epochs":
|
111 |
"save_steps": 500,
|
112 |
"total_flos": 1178475986400.0,
|
113 |
"train_batch_size": 32,
|
114 |
"trial_name": null,
|
115 |
"trial_params": {
|
116 |
-
"alpha": 0.
|
117 |
-
"learning_rate":
|
118 |
-
"num_train_epochs":
|
119 |
-
"temperature":
|
120 |
}
|
121 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6046966731898239,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-480",
|
4 |
"epoch": 5.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.8380242586135864,
|
14 |
+
"learning_rate": 8.6265645867868e-06,
|
15 |
+
"loss": 0.6538,
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.4980430528375734,
|
21 |
+
"eval_f1": 0.06215722120658134,
|
22 |
+
"eval_loss": 0.6460775136947632,
|
23 |
+
"eval_precision": 0.4722222222222222,
|
24 |
+
"eval_recall": 0.033268101761252444,
|
25 |
+
"eval_runtime": 32.7473,
|
26 |
+
"eval_samples_per_second": 31.209,
|
27 |
+
"eval_steps_per_second": 0.977,
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"grad_norm": 1.982408046722412,
|
33 |
+
"learning_rate": 6.901251669429439e-06,
|
34 |
+
"loss": 0.6453,
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.5,
|
40 |
+
"eval_f1": 0.019193857965451058,
|
41 |
+
"eval_loss": 0.6427881717681885,
|
42 |
+
"eval_precision": 0.5,
|
43 |
+
"eval_recall": 0.009784735812133072,
|
44 |
+
"eval_runtime": 30.8528,
|
45 |
+
"eval_samples_per_second": 33.125,
|
46 |
+
"eval_steps_per_second": 1.037,
|
47 |
"step": 192
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
+
"grad_norm": 1.274803638458252,
|
52 |
+
"learning_rate": 5.17593875207208e-06,
|
53 |
+
"loss": 0.6429,
|
54 |
"step": 288
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.5313111545988258,
|
59 |
+
"eval_f1": 0.18950930626057527,
|
60 |
+
"eval_loss": 0.6384090781211853,
|
61 |
+
"eval_precision": 0.7,
|
62 |
+
"eval_recall": 0.1095890410958904,
|
63 |
+
"eval_runtime": 29.2081,
|
64 |
+
"eval_samples_per_second": 34.99,
|
65 |
+
"eval_steps_per_second": 1.096,
|
66 |
"step": 288
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"grad_norm": 1.62313711643219,
|
71 |
+
"learning_rate": 3.4506258347147196e-06,
|
72 |
+
"loss": 0.6388,
|
73 |
"step": 384
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.5831702544031311,
|
78 |
+
"eval_f1": 0.3622754491017964,
|
79 |
+
"eval_loss": 0.6332760453224182,
|
80 |
+
"eval_precision": 0.7707006369426752,
|
81 |
+
"eval_recall": 0.23679060665362034,
|
82 |
+
"eval_runtime": 28.4104,
|
83 |
+
"eval_samples_per_second": 35.973,
|
84 |
+
"eval_steps_per_second": 1.126,
|
85 |
"step": 384
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
+
"grad_norm": 0.8507488965988159,
|
90 |
+
"learning_rate": 1.7253129173573598e-06,
|
91 |
+
"loss": 0.634,
|
92 |
"step": 480
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.6046966731898239,
|
97 |
+
"eval_f1": 0.43258426966292135,
|
98 |
+
"eval_loss": 0.629119336605072,
|
99 |
+
"eval_precision": 0.7661691542288557,
|
100 |
+
"eval_recall": 0.3013698630136986,
|
101 |
+
"eval_runtime": 28.4687,
|
102 |
+
"eval_samples_per_second": 35.899,
|
103 |
+
"eval_steps_per_second": 1.124,
|
104 |
"step": 480
|
105 |
}
|
106 |
],
|
107 |
"logging_steps": 500,
|
108 |
+
"max_steps": 576,
|
109 |
"num_input_tokens_seen": 0,
|
110 |
+
"num_train_epochs": 6,
|
111 |
"save_steps": 500,
|
112 |
"total_flos": 1178475986400.0,
|
113 |
"train_batch_size": 32,
|
114 |
"trial_name": null,
|
115 |
"trial_params": {
|
116 |
+
"alpha": 0.9136100763812092,
|
117 |
+
"learning_rate": 1.035187750414416e-05,
|
118 |
+
"num_train_epochs": 6,
|
119 |
+
"temperature": 5
|
120 |
}
|
121 |
}
|
run-0/checkpoint-480/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe7dd3c3e257754c6583ea668a830f19b3dc80cb7982abc3ddecb0dac8e92b5
|
3 |
size 4920
|
run-0/checkpoint-576/config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.39.1",
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
run-0/checkpoint-576/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:866330fe997ce9bb2e742f9da273ef2f056845da03268f44e44769e26c614745
|
3 |
size 17549312
|
run-0/checkpoint-576/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35122746
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c0f7ba770534fe57c88682e448a57cedaf46599305b57c11661c5b2c497b40d
|
3 |
size 35122746
|
run-0/checkpoint-576/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29cb964da2a7590070557ce0763bdb443af17a63d01f7981f09843d449341b4c
|
3 |
size 1064
|
run-0/checkpoint-576/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-576",
|
4 |
"epoch": 6.0,
|
5 |
"eval_steps": 500,
|
@@ -10,131 +10,131 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second":
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"grad_norm":
|
33 |
-
"learning_rate":
|
34 |
-
"loss": 0.
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
-
"eval_accuracy": 0.
|
40 |
-
"eval_f1": 0.
|
41 |
-
"eval_loss": 0.
|
42 |
-
"eval_precision": 0.
|
43 |
-
"eval_recall": 0.
|
44 |
-
"eval_runtime":
|
45 |
-
"eval_samples_per_second":
|
46 |
-
"eval_steps_per_second": 1.
|
47 |
"step": 192
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
-
"grad_norm":
|
52 |
-
"learning_rate":
|
53 |
-
"loss": 0.
|
54 |
"step": 288
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
-
"eval_accuracy": 0.
|
59 |
-
"eval_f1": 0.
|
60 |
-
"eval_loss": 0.
|
61 |
-
"eval_precision": 0.
|
62 |
-
"eval_recall": 0.
|
63 |
-
"eval_runtime":
|
64 |
-
"eval_samples_per_second":
|
65 |
-
"eval_steps_per_second": 1.
|
66 |
"step": 288
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"grad_norm":
|
71 |
-
"learning_rate":
|
72 |
-
"loss": 0.
|
73 |
"step": 384
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
-
"eval_accuracy": 0.
|
78 |
-
"eval_f1": 0.
|
79 |
-
"eval_loss": 0.
|
80 |
-
"eval_precision": 0.
|
81 |
-
"eval_recall": 0.
|
82 |
-
"eval_runtime":
|
83 |
-
"eval_samples_per_second":
|
84 |
-
"eval_steps_per_second": 1.
|
85 |
"step": 384
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
-
"grad_norm":
|
90 |
-
"learning_rate":
|
91 |
-
"loss": 0.
|
92 |
"step": 480
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
-
"eval_accuracy": 0.
|
97 |
-
"eval_f1": 0.
|
98 |
-
"eval_loss": 0.
|
99 |
-
"eval_precision": 0.
|
100 |
-
"eval_recall": 0.
|
101 |
-
"eval_runtime":
|
102 |
-
"eval_samples_per_second":
|
103 |
-
"eval_steps_per_second": 1.
|
104 |
"step": 480
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
-
"grad_norm":
|
109 |
-
"learning_rate":
|
110 |
-
"loss": 0.
|
111 |
"step": 576
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.0,
|
115 |
-
"eval_accuracy": 0.
|
116 |
-
"eval_f1": 0.
|
117 |
-
"eval_loss": 0.
|
118 |
-
"eval_precision": 0.
|
119 |
-
"eval_recall": 0.
|
120 |
-
"eval_runtime":
|
121 |
-
"eval_samples_per_second":
|
122 |
-
"eval_steps_per_second": 1.
|
123 |
"step": 576
|
124 |
}
|
125 |
],
|
126 |
"logging_steps": 500,
|
127 |
-
"max_steps":
|
128 |
"num_input_tokens_seen": 0,
|
129 |
-
"num_train_epochs":
|
130 |
"save_steps": 500,
|
131 |
"total_flos": 1414171183680.0,
|
132 |
"train_batch_size": 32,
|
133 |
"trial_name": null,
|
134 |
"trial_params": {
|
135 |
-
"alpha": 0.
|
136 |
-
"learning_rate":
|
137 |
-
"num_train_epochs":
|
138 |
-
"temperature":
|
139 |
}
|
140 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6076320939334638,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-576",
|
4 |
"epoch": 6.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.8380242586135864,
|
14 |
+
"learning_rate": 8.6265645867868e-06,
|
15 |
+
"loss": 0.6538,
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.4980430528375734,
|
21 |
+
"eval_f1": 0.06215722120658134,
|
22 |
+
"eval_loss": 0.6460775136947632,
|
23 |
+
"eval_precision": 0.4722222222222222,
|
24 |
+
"eval_recall": 0.033268101761252444,
|
25 |
+
"eval_runtime": 32.7473,
|
26 |
+
"eval_samples_per_second": 31.209,
|
27 |
+
"eval_steps_per_second": 0.977,
|
28 |
"step": 96
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"grad_norm": 1.982408046722412,
|
33 |
+
"learning_rate": 6.901251669429439e-06,
|
34 |
+
"loss": 0.6453,
|
35 |
"step": 192
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.5,
|
40 |
+
"eval_f1": 0.019193857965451058,
|
41 |
+
"eval_loss": 0.6427881717681885,
|
42 |
+
"eval_precision": 0.5,
|
43 |
+
"eval_recall": 0.009784735812133072,
|
44 |
+
"eval_runtime": 30.8528,
|
45 |
+
"eval_samples_per_second": 33.125,
|
46 |
+
"eval_steps_per_second": 1.037,
|
47 |
"step": 192
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
+
"grad_norm": 1.274803638458252,
|
52 |
+
"learning_rate": 5.17593875207208e-06,
|
53 |
+
"loss": 0.6429,
|
54 |
"step": 288
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.5313111545988258,
|
59 |
+
"eval_f1": 0.18950930626057527,
|
60 |
+
"eval_loss": 0.6384090781211853,
|
61 |
+
"eval_precision": 0.7,
|
62 |
+
"eval_recall": 0.1095890410958904,
|
63 |
+
"eval_runtime": 29.2081,
|
64 |
+
"eval_samples_per_second": 34.99,
|
65 |
+
"eval_steps_per_second": 1.096,
|
66 |
"step": 288
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"grad_norm": 1.62313711643219,
|
71 |
+
"learning_rate": 3.4506258347147196e-06,
|
72 |
+
"loss": 0.6388,
|
73 |
"step": 384
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.5831702544031311,
|
78 |
+
"eval_f1": 0.3622754491017964,
|
79 |
+
"eval_loss": 0.6332760453224182,
|
80 |
+
"eval_precision": 0.7707006369426752,
|
81 |
+
"eval_recall": 0.23679060665362034,
|
82 |
+
"eval_runtime": 28.4104,
|
83 |
+
"eval_samples_per_second": 35.973,
|
84 |
+
"eval_steps_per_second": 1.126,
|
85 |
"step": 384
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
+
"grad_norm": 0.8507488965988159,
|
90 |
+
"learning_rate": 1.7253129173573598e-06,
|
91 |
+
"loss": 0.634,
|
92 |
"step": 480
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.6046966731898239,
|
97 |
+
"eval_f1": 0.43258426966292135,
|
98 |
+
"eval_loss": 0.629119336605072,
|
99 |
+
"eval_precision": 0.7661691542288557,
|
100 |
+
"eval_recall": 0.3013698630136986,
|
101 |
+
"eval_runtime": 28.4687,
|
102 |
+
"eval_samples_per_second": 35.899,
|
103 |
+
"eval_steps_per_second": 1.124,
|
104 |
"step": 480
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
+
"grad_norm": 2.1146538257598877,
|
109 |
+
"learning_rate": 0.0,
|
110 |
+
"loss": 0.6325,
|
111 |
"step": 576
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.6076320939334638,
|
116 |
+
"eval_f1": 0.4438280166435506,
|
117 |
+
"eval_loss": 0.6275492906570435,
|
118 |
+
"eval_precision": 0.7619047619047619,
|
119 |
+
"eval_recall": 0.3131115459882583,
|
120 |
+
"eval_runtime": 28.3455,
|
121 |
+
"eval_samples_per_second": 36.055,
|
122 |
+
"eval_steps_per_second": 1.129,
|
123 |
"step": 576
|
124 |
}
|
125 |
],
|
126 |
"logging_steps": 500,
|
127 |
+
"max_steps": 576,
|
128 |
"num_input_tokens_seen": 0,
|
129 |
+
"num_train_epochs": 6,
|
130 |
"save_steps": 500,
|
131 |
"total_flos": 1414171183680.0,
|
132 |
"train_batch_size": 32,
|
133 |
"trial_name": null,
|
134 |
"trial_params": {
|
135 |
+
"alpha": 0.9136100763812092,
|
136 |
+
"learning_rate": 1.035187750414416e-05,
|
137 |
+
"num_train_epochs": 6,
|
138 |
+
"temperature": 5
|
139 |
}
|
140 |
}
|
run-0/checkpoint-576/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe7dd3c3e257754c6583ea668a830f19b3dc80cb7982abc3ddecb0dac8e92b5
|
3 |
size 4920
|
run-1/checkpoint-96/config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
|
|
27 |
"position_embedding_type": "absolute",
|
28 |
"problem_type": "single_label_classification",
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.39.1",
|
31 |
"type_vocab_size": 2,
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 30522
|
run-1/checkpoint-96/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55c05c9804e560e83219b8d7f0e16c10a78191786fdb883860175414f3c5708d
|
3 |
size 17549312
|
run-1/checkpoint-96/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35122746
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:453bcc9c41fcfb41c3251db6e85d5c2ff538e525b3c198489f5df853345ac256
|
3 |
size 35122746
|
run-1/checkpoint-96/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14054
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24aa86019b8aea1c551cc1adaf38c4db2fc01de75a22af312230f6b592e0fd81
|
3 |
size 14054
|
run-1/checkpoint-96/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86288a3f5338e6fa1288da8642a232d387a6d7873c6c0aef6d70a6441d28edaf
|
3 |
size 1064
|
run-1/checkpoint-96/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-96",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 96,
|
7 |
"is_hyper_param_search": true,
|
@@ -10,55 +10,36 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
-
"step": 48
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.7338551859099804,
|
21 |
-
"eval_f1": 0.7399617590822181,
|
22 |
-
"eval_loss": 0.448641836643219,
|
23 |
-
"eval_precision": 0.7233644859813084,
|
24 |
-
"eval_recall": 0.7573385518590998,
|
25 |
-
"eval_runtime": 30.0153,
|
26 |
-
"eval_samples_per_second": 34.049,
|
27 |
-
"eval_steps_per_second": 0.533,
|
28 |
-
"step": 48
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"epoch": 2.0,
|
32 |
-
"grad_norm": 3.3045527935028076,
|
33 |
-
"learning_rate": 0.00015508680621310162,
|
34 |
-
"loss": 0.4485,
|
35 |
"step": 96
|
36 |
},
|
37 |
{
|
38 |
-
"epoch":
|
39 |
-
"eval_accuracy": 0.
|
40 |
-
"eval_f1": 0.
|
41 |
-
"eval_loss": 0.
|
42 |
-
"eval_precision": 0.
|
43 |
-
"eval_recall": 0.
|
44 |
-
"eval_runtime": 28.
|
45 |
-
"eval_samples_per_second":
|
46 |
-
"eval_steps_per_second":
|
47 |
"step": 96
|
48 |
}
|
49 |
],
|
50 |
"logging_steps": 500,
|
51 |
-
"max_steps":
|
52 |
"num_input_tokens_seen": 0,
|
53 |
-
"num_train_epochs":
|
54 |
"save_steps": 500,
|
55 |
-
"total_flos":
|
56 |
-
"train_batch_size":
|
57 |
"trial_name": null,
|
58 |
"trial_params": {
|
59 |
-
"alpha": 0.
|
60 |
-
"learning_rate":
|
61 |
-
"num_train_epochs":
|
62 |
-
"temperature":
|
63 |
}
|
64 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5058708414872799,
|
3 |
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-96",
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 96,
|
7 |
"is_hyper_param_search": true,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.3468107581138611,
|
14 |
+
"learning_rate": 3.003997555812601e-05,
|
15 |
+
"loss": 0.3254,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"step": 96
|
17 |
},
|
18 |
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.5058708414872799,
|
21 |
+
"eval_f1": 0.04896421845574388,
|
22 |
+
"eval_loss": 0.30459368228912354,
|
23 |
+
"eval_precision": 0.65,
|
24 |
+
"eval_recall": 0.025440313111545987,
|
25 |
+
"eval_runtime": 28.2331,
|
26 |
+
"eval_samples_per_second": 36.199,
|
27 |
+
"eval_steps_per_second": 1.133,
|
28 |
"step": 96
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
+
"max_steps": 192,
|
33 |
"num_input_tokens_seen": 0,
|
34 |
+
"num_train_epochs": 2,
|
35 |
"save_steps": 500,
|
36 |
+
"total_flos": 235695197280.0,
|
37 |
+
"train_batch_size": 32,
|
38 |
"trial_name": null,
|
39 |
"trial_params": {
|
40 |
+
"alpha": 0.3812783883027333,
|
41 |
+
"learning_rate": 6.007995111625202e-05,
|
42 |
+
"num_train_epochs": 2,
|
43 |
+
"temperature": 27
|
44 |
}
|
45 |
}
|
run-1/checkpoint-96/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c269c0d2dbd92d16b973c47db85dda2eecea4e2aa70dfe60d322e9a0b7bcac4e
|
3 |
size 4920
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c269c0d2dbd92d16b973c47db85dda2eecea4e2aa70dfe60d322e9a0b7bcac4e
|
3 |
size 4920
|