xuancoblab2023 commited on
Commit
e2d7ddc
·
verified ·
1 Parent(s): 9ed69af

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1711296799.8cc2eef2edb7.2942.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ed6243de1b4126f38bda31247c0cdb569e0d85fd2368ac42cc28ee9b6056e27
3
- size 6136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1a5654e3d6fba66852b0b3fdbbe538b4d682b3fd4796ddd8536724429535cd
3
+ size 7220
logs/events.out.tfevents.1711296858.8cc2eef2edb7.2942.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9474f9bb69d62b8f8bbe6dd7caf55abc5b4f0dc9fc15dfe2101e9aa6fdd738c
3
+ size 5407
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4e4ed65de241fa382d9773b2e0209cc2134af774c4ee35f8d263835c2997d29
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd6611d9aeb31d0657795a4eaca47f364cd69272d929d8b6df5c3bda9e50e99
3
  size 17549312
run-1/checkpoint-891/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-1/checkpoint-891/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dad2c00079bf827d9e719db18d4c86aa796e128a9e02878962ed25adb3bcd1b
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9040181db8d76005f676d97b5534a17a4eb2972656beafd2186dc0a419359f6a
3
  size 17549312
run-1/checkpoint-891/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f743eb06051cc7a52c530b8568ca77b298b705e7ad276b49dc748f0572f28ee2
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2af4c26f15a15ff765a52e923766a4900ce6828c6c1331d3fd8bfb0624998a
3
+ size 35123898
run-1/checkpoint-891/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8825af3ee3a915f6774d349057db9cb8bcc74ea981ae299e1cb35993773495a5
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b689ca5add5bd2b3024a96e15979227e5788dfce70568554fb1702c585abab
3
+ size 14308
run-1/checkpoint-891/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48269bd025e4a2a9251c6dba3b071353cc95ee138f476ea609464c3d7a8e9248
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291008d3b6238846646f9b85fa92243b0cc21db6c35ba667d2fe4cc4e955c671
3
  size 1064
run-1/checkpoint-891/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-1/checkpoint-891/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6495049504950495,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-891",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 891,
@@ -10,65 +10,77 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.3426679372787476,
14
- "learning_rate": 0.0003560526201910554,
15
- "loss": 0.6371,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5841584158415841,
21
- "eval_loss": 0.637409508228302,
22
- "eval_runtime": 56.7978,
23
- "eval_samples_per_second": 8.891,
24
- "eval_steps_per_second": 0.282,
 
 
 
 
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 1.018396258354187,
30
- "learning_rate": 0.0002373684134607036,
31
- "loss": 0.603,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.6277227722772277,
37
- "eval_loss": 0.6017976403236389,
38
- "eval_runtime": 55.6441,
39
- "eval_samples_per_second": 9.076,
40
- "eval_steps_per_second": 0.288,
 
 
 
 
41
  "step": 594
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 1.4982593059539795,
46
- "learning_rate": 0.0001186842067303518,
47
- "loss": 0.588,
48
  "step": 891
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.6495049504950495,
53
- "eval_loss": 0.5974766612052917,
54
- "eval_runtime": 55.1255,
55
- "eval_samples_per_second": 9.161,
56
- "eval_steps_per_second": 0.29,
 
 
 
 
57
  "step": 891
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 1188,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 4,
64
  "save_steps": 500,
65
- "total_flos": 2333529849420.0,
66
  "train_batch_size": 32,
67
  "trial_name": null,
68
  "trial_params": {
69
- "alpha": 0.900582717199523,
70
- "learning_rate": 0.0004747368269214072,
71
- "num_train_epochs": 4,
72
- "temperature": 18
73
  }
74
  }
 
1
  {
2
+ "best_metric": 0.5425742574257426,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-594",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 891,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.9758312702178955,
14
+ "learning_rate": 2.4789676674761582e-05,
15
+ "loss": 0.4903,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.502970297029703,
21
+ "eval_f1": 0.08058608058608059,
22
+ "eval_loss": 0.48342981934547424,
23
+ "eval_mcc": 0.010331488326433175,
24
+ "eval_precision": 0.5238095238095238,
25
+ "eval_recall": 0.04365079365079365,
26
+ "eval_runtime": 0.9315,
27
+ "eval_samples_per_second": 542.124,
28
+ "eval_steps_per_second": 17.176,
29
  "step": 297
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 0.7211948037147522,
34
+ "learning_rate": 1.2394838337380791e-05,
35
+ "loss": 0.4812,
36
  "step": 594
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.5425742574257426,
41
+ "eval_f1": 0.3773584905660377,
42
+ "eval_loss": 0.4795511066913605,
43
+ "eval_mcc": 0.09908299956847767,
44
+ "eval_precision": 0.5882352941176471,
45
+ "eval_recall": 0.2777777777777778,
46
+ "eval_runtime": 0.9303,
47
+ "eval_samples_per_second": 542.812,
48
+ "eval_steps_per_second": 17.198,
49
  "step": 594
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "grad_norm": 0.8099855184555054,
54
+ "learning_rate": 0.0,
55
+ "loss": 0.4778,
56
  "step": 891
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "eval_accuracy": 0.5326732673267327,
61
+ "eval_f1": 0.32954545454545453,
62
+ "eval_loss": 0.47736045718193054,
63
+ "eval_mcc": 0.08048876531271514,
64
+ "eval_precision": 0.58,
65
+ "eval_recall": 0.23015873015873015,
66
+ "eval_runtime": 0.9394,
67
+ "eval_samples_per_second": 537.574,
68
+ "eval_steps_per_second": 17.032,
69
  "step": 891
70
  }
71
  ],
72
  "logging_steps": 500,
73
+ "max_steps": 891,
74
  "num_input_tokens_seen": 0,
75
+ "num_train_epochs": 3,
76
  "save_steps": 500,
77
+ "total_flos": 2192103797940.0,
78
  "train_batch_size": 32,
79
  "trial_name": null,
80
  "trial_params": {
81
+ "alpha": 0.6389006598546823,
82
+ "learning_rate": 3.718451501214238e-05,
83
+ "num_train_epochs": 3,
84
+ "temperature": 17
85
  }
86
  }
run-1/checkpoint-891/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6f636dbab8b5e516bcd85051f9f0732a28727ed1675f1d1f3076c7baf2be402
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6731b358035f4120a393e277eadf83407dfaf70785f0599f775fe36a1054fb1
3
+ size 4920
run-2/checkpoint-297/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-2/checkpoint-297/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae39d29dd9e95b52e3db360285f2fd6ef5b67eb60eaf84ca0b63e30e0a570a1b
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd6611d9aeb31d0657795a4eaca47f364cd69272d929d8b6df5c3bda9e50e99
3
  size 17549312
run-2/checkpoint-297/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:106f6d7312d0b8cbbb01e4e6f8733e13f510903a2af457faaaa23790cbf88dc6
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5715f10ee578dc0e25cbb1e64595d2d14801c8fac136a5b282b868d389c6ae
3
+ size 35123898
run-2/checkpoint-297/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f414017d19e8a66d09e6a16c0bca909eff6c9e5541f54da3f0dba2607378e04d
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f5e396c574b9b2ecc40d3f9e87d628a8f4be69caef81905a47fcf8895067e6
3
+ size 14308
run-2/checkpoint-297/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e29f4582b08e2f0e75667a827e9b21c66b9a7caa9edd4095c8bbdf663e5ee13f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec7f7712dcbdabc9e134706fd1460f920b47b7ffce0b705c1cbe548ebce60db
3
  size 1064
run-2/checkpoint-297/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-2/checkpoint-297/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.497029702970297,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-297",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,18 +10,22 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.46941760182380676,
14
- "learning_rate": 1.0066413527942762e-05,
15
- "loss": 0.2155,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.497029702970297,
21
- "eval_loss": 0.18417517840862274,
22
- "eval_runtime": 14.6034,
23
- "eval_samples_per_second": 34.581,
24
- "eval_steps_per_second": 1.096,
 
 
 
 
25
  "step": 297
26
  }
27
  ],
@@ -30,13 +34,13 @@
30
  "num_input_tokens_seen": 0,
31
  "num_train_epochs": 9,
32
  "save_steps": 500,
33
- "total_flos": 777843283140.0,
34
  "train_batch_size": 32,
35
  "trial_name": null,
36
  "trial_params": {
37
- "alpha": 0.20650329892275032,
38
- "learning_rate": 1.1324715218935609e-05,
39
  "num_train_epochs": 9,
40
- "temperature": 10
41
  }
42
  }
 
1
  {
2
+ "best_metric": 0.4910891089108911,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-297",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.6258772015571594,
14
+ "learning_rate": 1.171336966673164e-05,
15
+ "loss": 0.303,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.4910891089108911,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.28316354751586914,
23
+ "eval_mcc": -0.09980217586956908,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 0.9281,
27
+ "eval_samples_per_second": 544.113,
28
+ "eval_steps_per_second": 17.239,
29
  "step": 297
30
  }
31
  ],
 
34
  "num_input_tokens_seen": 0,
35
  "num_train_epochs": 9,
36
  "save_steps": 500,
37
+ "total_flos": 730701265980.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "alpha": 0.33782488262757904,
42
+ "learning_rate": 1.3177540875073095e-05,
43
  "num_train_epochs": 9,
44
+ "temperature": 26
45
  }
46
  }
run-2/checkpoint-297/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53af5913dd7a7ac18c4af82b6aa52534a7091d8ef8c41f1056aa7fa989430b5d
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b3023982056159c033a3a1a25c2d159010d9ff247770a44c7d7d6d0e3bba5b
3
+ size 4920
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0df23de5f2d7c569f8878223b016d804cf54077b0824a5d63ecc274595ea3eea
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b3023982056159c033a3a1a25c2d159010d9ff247770a44c7d7d6d0e3bba5b
3
+ size 4920