xuancoblab2023 commited on
Commit
a624889
·
verified ·
1 Parent(s): aac6f57

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1713611453.a9446dbff3d4.7440.12 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f40b6287fbf34da48d639cae3380f68354896cc8d8a16b149626a5cb8acfe201
3
- size 5482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077e622b1ef7a9f904e281c3128cf550a8d692a3b2093bb1b70ac0a5e022354b
3
+ size 6212
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40c9694ee190d60f382d769e24b26fe71862c04c4247211ccc04f65f552d99a9
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dfe0952f0a6030e34124601ff79f276e2ceb48a63399634f680d64e7b92c8a
3
  size 17549312
run-10/checkpoint-320/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-10/checkpoint-320/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df43e1070c8179c35300d6007c86b5179fa30a375bed414bb7493824d5589c90
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dfe0952f0a6030e34124601ff79f276e2ceb48a63399634f680d64e7b92c8a
3
  size 17549312
run-10/checkpoint-320/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9db63500231342ee5dc8a44094ace33abc529e6af8e4c27cc3727cdc1b658e26
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba5ab31748daa20b30c694f336c403661966591f5dd1193478cc81419d7d3bc
3
+ size 35123898
run-10/checkpoint-320/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3d2fae8f804d730b1cae7bbde900a00413fe63bf39745fa2e32472d277792f0
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce5941012578d026c76372190ed75bc3a1c59455c429a4fd246677d1bfc3594
3
+ size 14308
run-10/checkpoint-320/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85acdd34f6f1e7e049f0788e3eb1cd47ca53a7e3f51f2309b24a53c47c6abbb3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e046c557ffe97ab8638cd9fcf0bcee8e1793184754162b8180330c58912a7e
3
  size 1064
run-10/checkpoint-320/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8131115459882583,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-320",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
  "global_step": 320,
7
  "is_hyper_param_search": true,
@@ -10,113 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 7.969868183135986,
14
- "learning_rate": 0.0006324213595544264,
15
- "loss": 0.5787,
16
- "step": 64
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.773972602739726,
21
- "eval_f1": 0.8070175438596492,
22
- "eval_loss": 0.48913490772247314,
23
- "eval_precision": 0.7040816326530612,
24
- "eval_recall": 0.9452054794520548,
25
- "eval_runtime": 27.7283,
26
- "eval_samples_per_second": 36.858,
27
- "eval_steps_per_second": 1.154,
28
- "step": 64
 
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 2.9250471591949463,
33
- "learning_rate": 0.00047431601966581977,
34
- "loss": 0.46,
35
- "step": 128
36
- },
37
- {
38
- "epoch": 2.0,
39
- "eval_accuracy": 0.799412915851272,
40
- "eval_f1": 0.8138056312443234,
41
- "eval_loss": 0.4400319457054138,
42
- "eval_precision": 0.7593220338983051,
43
- "eval_recall": 0.8767123287671232,
44
- "eval_runtime": 28.2626,
45
- "eval_samples_per_second": 36.161,
46
- "eval_steps_per_second": 1.132,
47
- "step": 128
48
- },
49
- {
50
- "epoch": 3.0,
51
- "grad_norm": 3.8848695755004883,
52
- "learning_rate": 0.0003162106797772132,
53
- "loss": 0.4251,
54
- "step": 192
55
- },
56
- {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.8111545988258317,
59
- "eval_f1": 0.8170616113744076,
60
- "eval_loss": 0.4429156482219696,
61
- "eval_precision": 0.7922794117647058,
62
- "eval_recall": 0.8434442270058709,
63
- "eval_runtime": 29.3102,
64
- "eval_samples_per_second": 34.868,
65
- "eval_steps_per_second": 1.092,
66
- "step": 192
67
- },
68
- {
69
- "epoch": 4.0,
70
- "grad_norm": 6.6206135749816895,
71
- "learning_rate": 0.0001581053398886066,
72
- "loss": 0.4016,
73
- "step": 256
74
- },
75
- {
76
- "epoch": 4.0,
77
- "eval_accuracy": 0.8023483365949119,
78
- "eval_f1": 0.8027343750000001,
79
- "eval_loss": 0.42793938517570496,
80
- "eval_precision": 0.8011695906432749,
81
- "eval_recall": 0.8043052837573386,
82
- "eval_runtime": 28.4384,
83
- "eval_samples_per_second": 35.937,
84
- "eval_steps_per_second": 1.125,
85
- "step": 256
86
- },
87
- {
88
- "epoch": 5.0,
89
- "grad_norm": 2.5236122608184814,
90
- "learning_rate": 0.0,
91
- "loss": 0.3827,
92
  "step": 320
93
  },
94
  {
95
- "epoch": 5.0,
96
- "eval_accuracy": 0.8131115459882583,
97
- "eval_f1": 0.8242870285188593,
98
- "eval_loss": 0.4034118950366974,
99
- "eval_precision": 0.7777777777777778,
100
- "eval_recall": 0.8767123287671232,
101
- "eval_runtime": 27.9789,
102
- "eval_samples_per_second": 36.528,
103
- "eval_steps_per_second": 1.144,
 
104
  "step": 320
105
  }
106
  ],
107
  "logging_steps": 500,
108
- "max_steps": 320,
109
  "num_input_tokens_seen": 0,
110
- "num_train_epochs": 5,
111
  "save_steps": 500,
112
- "total_flos": 1178475986400.0,
113
- "train_batch_size": 48,
114
  "trial_name": null,
115
  "trial_params": {
116
- "alpha": 0.9990297556295253,
117
- "learning_rate": 0.000790526699443033,
118
- "num_train_epochs": 5,
119
- "per_device_train_batch_size": 48,
120
- "temperature": 30
121
  }
122
  }
 
1
  {
2
+ "best_metric": 0.676078431372549,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-320",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 320,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 2.012742280960083,
14
+ "learning_rate": 0.0007591954920690624,
15
+ "loss": 0.5869,
16
+ "step": 160
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.5785399675369263,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.8697,
27
+ "eval_samples_per_second": 681.924,
28
+ "eval_steps_per_second": 21.394,
29
+ "step": 160
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 2.057482957839966,
34
+ "learning_rate": 0.0006642960555604296,
35
+ "loss": 0.5715,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "step": 320
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.676078431372549,
41
+ "eval_f1": 0.4411366711772666,
42
+ "eval_loss": 0.5550761222839355,
43
+ "eval_mcc": 0.22526645932553852,
44
+ "eval_precision": 0.5191082802547771,
45
+ "eval_recall": 0.3835294117647059,
46
+ "eval_runtime": 1.8672,
47
+ "eval_samples_per_second": 682.827,
48
+ "eval_steps_per_second": 21.422,
49
  "step": 320
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 1440,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 9,
56
  "save_steps": 500,
57
+ "total_flos": 583510875840.0,
58
+ "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.9116955099903541,
62
+ "learning_rate": 0.0008540949285776952,
63
+ "num_train_epochs": 9,
64
+ "temperature": 21
 
65
  }
66
  }
run-10/checkpoint-320/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f10318ca028e91ec5668eb2cb6ad3dd869bdfca116061dca662c8b2df479ddbc
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34acf28a009a32e591bbfa0e1392d8f5ce60c738ac5de3b2d0a889c73580dc19
3
+ size 5048