xuancoblab2023 commited on
Commit
1c3fe90
·
verified ·
1 Parent(s): b5be5cb

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1711297031.8cc2eef2edb7.2942.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5671c871dec21b814e238027bc460e37eeca98777d4d78926a461e91685a1999
3
- size 5406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aec06acf2a8e079593f9eca3449c4e387e8466ce8e96255fe32bec5c97c28ff
3
+ size 6136
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8535679c03461b298018d74a95c8d714c32a4f38defdc9aadd1837a148497803
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8787c73455dbc98abc8d9e870c41942e3996f1081a3f3c5cd872817f27a3b53
3
  size 17549312
run-3/checkpoint-594/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-3/checkpoint-594/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcc44da7da9ee7483223f84fc107bea919ad244d54f5eedf49f42aaf1954cee3
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8787c73455dbc98abc8d9e870c41942e3996f1081a3f3c5cd872817f27a3b53
3
  size 17549312
run-3/checkpoint-594/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b00bd81260407d2dcacc2c135676399912a16b5ea23b5442d6377ef454fc7c84
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b467e85815ba0af002cb553cec311e49a0f572a70fb2690ad6d7728980c1fd8
3
+ size 35123898
run-3/checkpoint-594/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df6866d0dff400085edf68783c4718d94e08dcfa8f0d23f9d94ad3e230de2def
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07703232e8862ddf942921f960a4672ac4604f89da896da8b4bab92d3ecc94f2
3
+ size 14308
run-3/checkpoint-594/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9d4f642fea8f45d839698b4d161b45cf4f8c426e82e8780e36861baa6efe2aa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba183537a264f124c1c17977272b5c3edb6a640be7f75fd73ba9c48332f9477e
3
  size 1064
run-3/checkpoint-594/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-3/checkpoint-594/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-594",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 594,
@@ -10,34 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.9984288811683655,
14
- "learning_rate": 9.995670829688553e-05,
15
- "loss": 0.5706,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5128712871287129,
21
- "eval_loss": 0.5624967217445374,
22
- "eval_runtime": 13.6049,
23
- "eval_samples_per_second": 37.119,
24
- "eval_steps_per_second": 1.176,
 
 
 
 
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 0.9848873615264893,
30
- "learning_rate": 4.9978354148442764e-05,
31
- "loss": 0.5602,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.6,
37
- "eval_loss": 0.5566068291664124,
38
- "eval_runtime": 13.9891,
39
- "eval_samples_per_second": 36.099,
40
- "eval_steps_per_second": 1.144,
 
 
 
 
41
  "step": 594
42
  }
43
  ],
@@ -46,13 +54,13 @@
46
  "num_input_tokens_seen": 0,
47
  "num_train_epochs": 3,
48
  "save_steps": 500,
49
- "total_flos": 1555686566280.0,
50
  "train_batch_size": 32,
51
  "trial_name": null,
52
  "trial_params": {
53
- "alpha": 0.7785816803005383,
54
- "learning_rate": 0.0001499350624453283,
55
  "num_train_epochs": 3,
56
- "temperature": 16
57
  }
58
  }
 
1
  {
2
+ "best_metric": 0.5168316831683168,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-297",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 594,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8371890187263489,
14
+ "learning_rate": 0.000508582557644255,
15
+ "loss": 0.309,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5168316831683168,
21
+ "eval_f1": 0.1643835616438356,
22
+ "eval_loss": 0.30045250058174133,
23
+ "eval_mcc": 0.059239742807176775,
24
+ "eval_precision": 0.6,
25
+ "eval_recall": 0.09523809523809523,
26
+ "eval_runtime": 0.9336,
27
+ "eval_samples_per_second": 540.929,
28
+ "eval_steps_per_second": 17.138,
29
  "step": 297
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 0.21939373016357422,
34
+ "learning_rate": 0.0002542912788221275,
35
+ "loss": 0.3016,
36
  "step": 594
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.5128712871287129,
41
+ "eval_f1": 0.14583333333333331,
42
+ "eval_loss": 0.29779767990112305,
43
+ "eval_mcc": 0.046724423554504804,
44
+ "eval_precision": 0.5833333333333334,
45
+ "eval_recall": 0.08333333333333333,
46
+ "eval_runtime": 0.9339,
47
+ "eval_samples_per_second": 540.765,
48
+ "eval_steps_per_second": 17.133,
49
  "step": 594
50
  }
51
  ],
 
54
  "num_input_tokens_seen": 0,
55
  "num_train_epochs": 3,
56
  "save_steps": 500,
57
+ "total_flos": 1461402531960.0,
58
  "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.37059488996882817,
62
+ "learning_rate": 0.0007628738364663827,
63
  "num_train_epochs": 3,
64
+ "temperature": 5
65
  }
66
  }
run-3/checkpoint-594/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9decda4f5411e0ed12b24ed68c82ec0237bdb8018a369738f83b0ca5fb77d628
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77ece67af99569b700b80b7c3d227a73e33d56768e0354e21fbf558f6ec676a
3
+ size 4920