HikasaHana commited on
Commit
4f0d978
·
verified ·
1 Parent(s): 8b2dedc

Training in progress, epoch 3

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820c084205589a5efc90f718f254e9298580a8c2d7e7c8732b8e7ce2edb3a47e
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039221f87fe5006e5a007dc56327ae02634d85210f4b85121cfc93ce774db253
3
  size 409103316
run-2/checkpoint-1066/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:646bf64f2d7588bc518e6f45e4153810e63b161c8d62988b9688bcf154da9d40
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca35b271530315f2f01ac7abd9889d24774f1c3eedefb9435700d38b775ff18
3
  size 409103316
run-2/checkpoint-1066/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbcfcffbbed7e27fc89e35841a8260e7db4f5de1c56df37bea02732b92a433cf
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228635f9d33fbffe83173d5ba2f01c1d6d488e839960208c15b03588eda72308
3
  size 818327802
run-2/checkpoint-1066/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9447444aa7c89c55e6d4117866cbaeb3c3393fad8adc5cb2275d3443f1381bc0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c26a4462a8ed7deae233a5c9e29e1937c85b2f67202abb6545afceac724111
3
  size 1064
run-2/checkpoint-1066/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6080508828163147,
3
- "best_model_checkpoint": "BERT-WMM/run-2/checkpoint-533",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1066,
@@ -10,49 +10,46 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
- "grad_norm": 9.333391189575195,
14
- "learning_rate": 1.2018217372024236e-05,
15
- "loss": 0.7056,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7427230046948357,
21
- "eval_loss": 0.6080508828163147,
22
- "eval_runtime": 1.9882,
23
- "eval_samples_per_second": 1071.329,
24
- "eval_steps_per_second": 67.398,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
- "grad_norm": 10.452563285827637,
30
- "learning_rate": 9.790971750818855e-06,
31
- "loss": 0.4566,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7647887323943662,
37
- "eval_loss": 0.6108278632164001,
38
- "eval_runtime": 2.0426,
39
- "eval_samples_per_second": 1042.799,
40
- "eval_steps_per_second": 65.603,
41
  "step": 1066
42
  }
43
  ],
44
  "logging_steps": 500,
45
- "max_steps": 3198,
46
  "num_input_tokens_seen": 0,
47
- "num_train_epochs": 6,
48
  "save_steps": 500,
49
  "total_flos": 338261076519408.0,
50
  "train_batch_size": 16,
51
  "trial_name": null,
52
  "trial_params": {
53
- "learning_rate": 1.4245462993229616e-05,
54
- "num_train_epochs": 6,
55
- "per_device_train_batch_size": 16,
56
- "weight_decay": 0.002285356379677195
57
  }
58
  }
 
1
  {
2
+ "best_metric": 0.6166056394577026,
3
+ "best_model_checkpoint": "BERT-WMM/run-2/checkpoint-1066",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1066,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
+ "grad_norm": 11.24305248260498,
14
+ "learning_rate": 2.922625428602704e-06,
15
+ "loss": 0.7917,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7352112676056338,
21
+ "eval_loss": 0.647247314453125,
22
+ "eval_runtime": 2.1238,
23
+ "eval_samples_per_second": 1002.939,
24
+ "eval_steps_per_second": 63.096,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
+ "grad_norm": 23.33644676208496,
30
+ "learning_rate": 1.5929505293294082e-06,
31
+ "loss": 0.5783,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7511737089201878,
37
+ "eval_loss": 0.6166056394577026,
38
+ "eval_runtime": 2.7158,
39
+ "eval_samples_per_second": 784.299,
40
+ "eval_steps_per_second": 49.341,
41
  "step": 1066
42
  }
43
  ],
44
  "logging_steps": 500,
45
+ "max_steps": 1599,
46
  "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 3,
48
  "save_steps": 500,
49
  "total_flos": 338261076519408.0,
50
  "train_batch_size": 16,
51
  "trial_name": null,
52
  "trial_params": {
53
+ "learning_rate": 4.252300327876e-06
 
 
 
54
  }
55
  }
run-2/checkpoint-1066/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a522d40064169dd451f5c63d95d053409f9056cc8f37d6ef08fd626cff2d294
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b47f82980a9cdafe965500d0bc73ea3bdf726aae69cba15d6dccf27551f040c
3
  size 4856
run-2/checkpoint-1599/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:423fd0a8c6263e8c2d7b12bb899ce6b42430747b90dea9c8dbaf7138c4959529
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039221f87fe5006e5a007dc56327ae02634d85210f4b85121cfc93ce774db253
3
  size 409103316
run-2/checkpoint-1599/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa231502f99c5628f628a6bc8a1b2723ff93692bb148192c6f7c3f169afdd47c
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cb7773c8e10dd0f24fff2834e66435c66947f925e0e6a1322f10e4198ca110
3
  size 818327802
run-2/checkpoint-1599/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abc0f1b7e77e27c09ef04236a0848487a399bf089fa6c70bbf2124a69abd62a8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990859fad7f2a4e71666497c0ed5862e191297af7bf46464b786b129838d8ea1
3
  size 1064
run-2/checkpoint-1599/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6080508828163147,
3
- "best_model_checkpoint": "BERT-WMM/run-2/checkpoint-533",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1599,
@@ -10,65 +10,62 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
- "grad_norm": 9.333391189575195,
14
- "learning_rate": 1.2018217372024236e-05,
15
- "loss": 0.7056,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7427230046948357,
21
- "eval_loss": 0.6080508828163147,
22
- "eval_runtime": 1.9882,
23
- "eval_samples_per_second": 1071.329,
24
- "eval_steps_per_second": 67.398,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
- "grad_norm": 10.452563285827637,
30
- "learning_rate": 9.790971750818855e-06,
31
- "loss": 0.4566,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7647887323943662,
37
- "eval_loss": 0.6108278632164001,
38
- "eval_runtime": 2.0426,
39
- "eval_samples_per_second": 1042.799,
40
- "eval_steps_per_second": 65.603,
41
  "step": 1066
42
  },
43
  {
44
  "epoch": 2.81,
45
- "grad_norm": 15.208818435668945,
46
- "learning_rate": 7.563726129613473e-06,
47
- "loss": 0.2952,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.7723004694835681,
53
- "eval_loss": 0.7061524391174316,
54
- "eval_runtime": 2.0864,
55
- "eval_samples_per_second": 1020.91,
56
- "eval_steps_per_second": 64.226,
57
  "step": 1599
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 3198,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 6,
64
  "save_steps": 500,
65
  "total_flos": 507646505902536.0,
66
  "train_batch_size": 16,
67
  "trial_name": null,
68
  "trial_params": {
69
- "learning_rate": 1.4245462993229616e-05,
70
- "num_train_epochs": 6,
71
- "per_device_train_batch_size": 16,
72
- "weight_decay": 0.002285356379677195
73
  }
74
  }
 
1
  {
2
+ "best_metric": 0.6166056394577026,
3
+ "best_model_checkpoint": "BERT-WMM/run-2/checkpoint-1066",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1599,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
+ "grad_norm": 11.24305248260498,
14
+ "learning_rate": 2.922625428602704e-06,
15
+ "loss": 0.7917,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7352112676056338,
21
+ "eval_loss": 0.647247314453125,
22
+ "eval_runtime": 2.1238,
23
+ "eval_samples_per_second": 1002.939,
24
+ "eval_steps_per_second": 63.096,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
+ "grad_norm": 23.33644676208496,
30
+ "learning_rate": 1.5929505293294082e-06,
31
+ "loss": 0.5783,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7511737089201878,
37
+ "eval_loss": 0.6166056394577026,
38
+ "eval_runtime": 2.7158,
39
+ "eval_samples_per_second": 784.299,
40
+ "eval_steps_per_second": 49.341,
41
  "step": 1066
42
  },
43
  {
44
  "epoch": 2.81,
45
+ "grad_norm": 19.725927352905273,
46
+ "learning_rate": 2.6327563005611253e-07,
47
+ "loss": 0.5084,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.7549295774647887,
53
+ "eval_loss": 0.6294087171554565,
54
+ "eval_runtime": 2.0645,
55
+ "eval_samples_per_second": 1031.734,
56
+ "eval_steps_per_second": 64.907,
57
  "step": 1599
58
  }
59
  ],
60
  "logging_steps": 500,
61
+ "max_steps": 1599,
62
  "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 3,
64
  "save_steps": 500,
65
  "total_flos": 507646505902536.0,
66
  "train_batch_size": 16,
67
  "trial_name": null,
68
  "trial_params": {
69
+ "learning_rate": 4.252300327876e-06
 
 
 
70
  }
71
  }
run-2/checkpoint-1599/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a522d40064169dd451f5c63d95d053409f9056cc8f37d6ef08fd626cff2d294
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b47f82980a9cdafe965500d0bc73ea3bdf726aae69cba15d6dccf27551f040c
3
  size 4856
runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713458197.544fc269209b.792.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54e3967c5a0ce893b9d481f47c3bcae87a2723a978aba5962270b4b7ffe6994a
3
- size 5635
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a461b67c74679f1a4f5f40cfa5cf536220ee957118befef57aa628677112961e
3
+ size 6846