xuancoblab2023 commited on
Commit
062d1e9
·
verified ·
1 Parent(s): eb15430

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1711273902.73e3a81c01ef.4225.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52639697b4c0801d45c96b78b024abec657c666239e4bd37557653bccf89b094
3
- size 5407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74e26e192d8c7f213d390797c7892456060b9a21f5cea17a996123c87d0b2be8
3
+ size 6137
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9b577371af93984a46904b9481062b830719a7ec03c064beb972380f48b9bc0
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a388d72e55b7953b67cdfa00ddc695e26206a74815a6d36483067f798bda2f
3
  size 17549312
run-3/checkpoint-384/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-3/checkpoint-384/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc097722feb26fcf6693b9dbe97ea3a76b6802da9cd21cb5bf38424a0f3cfbad
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a388d72e55b7953b67cdfa00ddc695e26206a74815a6d36483067f798bda2f
3
  size 17549312
run-3/checkpoint-384/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12d4c8e2b1db72df2a15dfb1bed5f04a8214b8e53b3366ebb528306bcc4f8073
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f464f717f404629956b434b8f10ba552dc7072676fd35983f5993b56e5a75d8
3
  size 35122746
run-3/checkpoint-384/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b1fc07ed8b121716021ec87c686f1cd9c3b89c82ea08a6a0792d47a39077c9
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2558f72cda987826e5e7caf54cc6282fe335ebecbddfed7bb83f0184d1f54cc
3
  size 14054
run-3/checkpoint-384/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9ff4fe586e7eb333ddca8c1956ec7e088a7d846492f389a13a5f39b9e31e62d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad75831118aa992f2e70ece558a78a8328cfe8f2d54db597c8aa3051d18df32
3
  size 1064
run-3/checkpoint-384/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.5009784735812133,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-384",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 384,
7
  "is_hyper_param_search": true,
@@ -10,93 +10,58 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.101290702819824,
14
- "learning_rate": 1.1962059637335337e-06,
15
- "loss": 0.5173,
16
- "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5,
21
- "eval_f1": 0.6666666666666666,
22
- "eval_loss": 0.5046124458312988,
23
- "eval_precision": 0.5,
24
- "eval_recall": 1.0,
25
- "eval_runtime": 28.251,
26
- "eval_samples_per_second": 36.176,
27
- "eval_steps_per_second": 1.133,
28
- "step": 96
29
- },
30
- {
31
- "epoch": 2.0,
32
- "grad_norm": 0.6350829005241394,
33
- "learning_rate": 1.0466802182668422e-06,
34
- "loss": 0.4989,
35
  "step": 192
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.4843444227005871,
40
- "eval_f1": 0.6216798277099785,
41
- "eval_loss": 0.48798972368240356,
42
- "eval_precision": 0.4909297052154195,
43
- "eval_recall": 0.8473581213307241,
44
- "eval_runtime": 28.492,
45
- "eval_samples_per_second": 35.87,
46
- "eval_steps_per_second": 1.123,
47
- "step": 192
48
- },
49
- {
50
- "epoch": 3.0,
51
- "grad_norm": 1.8015046119689941,
52
- "learning_rate": 8.971544728001503e-07,
53
- "loss": 0.4869,
54
- "step": 288
55
- },
56
- {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.48238747553816047,
59
- "eval_f1": 0.09572649572649572,
60
- "eval_loss": 0.4779175817966461,
61
- "eval_precision": 0.3783783783783784,
62
- "eval_recall": 0.0547945205479452,
63
- "eval_runtime": 28.2537,
64
- "eval_samples_per_second": 36.172,
65
- "eval_steps_per_second": 1.133,
66
- "step": 288
67
- },
68
- {
69
- "epoch": 4.0,
70
- "grad_norm": 2.0801610946655273,
71
- "learning_rate": 7.476287273334586e-07,
72
- "loss": 0.4795,
73
  "step": 384
74
  },
75
  {
76
- "epoch": 4.0,
77
- "eval_accuracy": 0.5009784735812133,
78
- "eval_f1": 0.00390625,
79
- "eval_loss": 0.47173693776130676,
80
- "eval_precision": 1.0,
81
- "eval_recall": 0.0019569471624266144,
82
- "eval_runtime": 28.2359,
83
- "eval_samples_per_second": 36.195,
84
- "eval_steps_per_second": 1.133,
 
85
  "step": 384
86
  }
87
  ],
88
  "logging_steps": 500,
89
- "max_steps": 864,
90
  "num_input_tokens_seen": 0,
91
- "num_train_epochs": 9,
92
  "save_steps": 500,
93
- "total_flos": 942780789120.0,
94
- "train_batch_size": 32,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.586704262013931,
98
- "learning_rate": 1.3457317092002255e-06,
99
- "num_train_epochs": 9,
100
- "temperature": 21
 
101
  }
102
  }
 
1
  {
2
+ "best_metric": 0.5459882583170255,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-384",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 384,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.7568773031234741,
14
+ "learning_rate": 0.00039540069434609415,
15
+ "loss": 0.3274,
16
+ "step": 192
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5430528375733855,
21
+ "eval_f1": 0.20442930153321975,
22
+ "eval_loss": 0.30545827746391296,
23
+ "eval_mcc": 0.1640968755879385,
24
+ "eval_precision": 0.7894736842105263,
25
+ "eval_recall": 0.11741682974559686,
26
+ "eval_runtime": 66.7048,
27
+ "eval_samples_per_second": 15.321,
28
+ "eval_steps_per_second": 0.48,
 
 
 
 
 
 
29
  "step": 192
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 1.822717547416687,
34
+ "learning_rate": 0.0003295005786217451,
35
+ "loss": 0.3076,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "step": 384
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.5459882583170255,
41
+ "eval_f1": 0.21355932203389827,
42
+ "eval_loss": 0.2995118200778961,
43
+ "eval_mcc": 0.17219814389174506,
44
+ "eval_precision": 0.7974683544303798,
45
+ "eval_recall": 0.1232876712328767,
46
+ "eval_runtime": 66.8,
47
+ "eval_samples_per_second": 15.299,
48
+ "eval_steps_per_second": 0.479,
49
  "step": 384
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 1344,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 7,
56
  "save_steps": 500,
57
+ "total_flos": 471390394560.0,
58
+ "train_batch_size": 16,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.4369516344275761,
62
+ "learning_rate": 0.00046130081007044317,
63
+ "num_train_epochs": 7,
64
+ "per_device_train_batch_size": 16,
65
+ "temperature": 15
66
  }
67
  }
run-3/checkpoint-384/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b765cab6920f955460cc4f812305ba7f085db0f4dbb906f08e22ff912fece657
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1984bbced85533f487884fbafe7fdded01dc8a3f7df5f2a96fbc5883290bb68c
3
+ size 4984