HikasaHana commited on
Commit
b41f2d8
·
verified ·
1 Parent(s): 4009b85

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de9d4e6af8c11cd29e69c392ec14d9364b5fc0b427d4a2da5ebbe57218e19cc2
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5578deba1dcc91c0b71b46aef08403e64da6a3a35df1fb9077b218d87533013d
3
  size 409103316
run-4/checkpoint-134/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c67fb7b9358f57bfdaef0c3589bb5904c67c8347682bd50405361ed65907e33
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5578deba1dcc91c0b71b46aef08403e64da6a3a35df1fb9077b218d87533013d
3
  size 409103316
run-4/checkpoint-134/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff7a2c56a2166efd74e24bd5712b8fd2fd7e799c8835c6efdc74ecaa1e64002
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229ed86a94f4b0d5761a6ef084d195ff75f9bfcf0ffc36de779d6024045852f8
3
  size 818327802
run-4/checkpoint-134/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5a4f9e43d7415e006adfefe55fd5f201e0050139566d60db3f9915f9f0e69d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74c8a105948d48dcd15e7028bd2fa56cfb6cdd52de3679c246102a646100df2
3
  size 1064
run-4/checkpoint-134/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6610550880432129,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-134",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,26 +10,26 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7248826291079812,
14
- "eval_loss": 0.6610550880432129,
15
- "eval_runtime": 2.0768,
16
- "eval_samples_per_second": 1025.597,
17
- "eval_steps_per_second": 64.521,
18
  "step": 134
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 804,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 6,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 64,
28
  "trial_name": null,
29
  "trial_params": {
30
- "learning_rate": 1.1434279877264858e-05,
31
- "num_train_epochs": 6,
32
  "per_device_train_batch_size": 64,
33
- "weight_decay": 0.007798418229484321
34
  }
35
  }
 
1
  {
2
+ "best_metric": 0.632646918296814,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-134",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7352112676056338,
14
+ "eval_loss": 0.632646918296814,
15
+ "eval_runtime": 1.9849,
16
+ "eval_samples_per_second": 1073.124,
17
+ "eval_steps_per_second": 67.511,
18
  "step": 134
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 536,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 4,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 64,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "learning_rate": 2.9536715900078686e-05,
31
+ "num_train_epochs": 4,
32
  "per_device_train_batch_size": 64,
33
+ "weight_decay": 0.0002947113945627026
34
  }
35
  }
run-4/checkpoint-134/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d75b109551f68ad4f0f12814a2fb4545b6f0e4d94a8e75020f66e88d8a06ea
3
  size 4856
run-4/checkpoint-268/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc9eb0941b8fb2df459d3d00b865513510c96290e47db2cfc7345527f4e296c0
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb5f6d8e7fd35a1b474339dc030269c4e58beeb61e1aae7e7a6986484220326
3
  size 409103316
run-4/checkpoint-268/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:216c153733556ac4ce9db69c37cbcb2c1162e907d154a9eded2328d829e99e68
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed4b64e811ecb30a9114ccec322170b49bef2f20d58f5dc9fdab1d26175ee05
3
  size 818327802
run-4/checkpoint-268/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f20e2d43cb43bcddd3be93609f4dfb28258ca14684d749f83306b1b2d248346f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a9c9791dff14b2da8a37b3531396d2d83be144283e29ef8ac77b77d84d0480
3
  size 1064
run-4/checkpoint-268/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6047325730323792,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,35 +10,35 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7248826291079812,
14
- "eval_loss": 0.6610550880432129,
15
- "eval_runtime": 2.0768,
16
- "eval_samples_per_second": 1025.597,
17
- "eval_steps_per_second": 64.521,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.755868544600939,
23
- "eval_loss": 0.6047325730323792,
24
- "eval_runtime": 2.0636,
25
- "eval_samples_per_second": 1032.163,
26
- "eval_steps_per_second": 64.934,
27
  "step": 268
28
  }
29
  ],
30
  "logging_steps": 500,
31
- "max_steps": 804,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 6,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
  "train_batch_size": 64,
37
  "trial_name": null,
38
  "trial_params": {
39
- "learning_rate": 1.1434279877264858e-05,
40
- "num_train_epochs": 6,
41
  "per_device_train_batch_size": 64,
42
- "weight_decay": 0.007798418229484321
43
  }
44
  }
 
1
  {
2
+ "best_metric": 0.6025658845901489,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7352112676056338,
14
+ "eval_loss": 0.632646918296814,
15
+ "eval_runtime": 1.9849,
16
+ "eval_samples_per_second": 1073.124,
17
+ "eval_steps_per_second": 67.511,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.7539906103286385,
23
+ "eval_loss": 0.6025658845901489,
24
+ "eval_runtime": 2.0385,
25
+ "eval_samples_per_second": 1044.892,
26
+ "eval_steps_per_second": 65.735,
27
  "step": 268
28
  }
29
  ],
30
  "logging_steps": 500,
31
+ "max_steps": 536,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 4,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
  "train_batch_size": 64,
37
  "trial_name": null,
38
  "trial_params": {
39
+ "learning_rate": 2.9536715900078686e-05,
40
+ "num_train_epochs": 4,
41
  "per_device_train_batch_size": 64,
42
+ "weight_decay": 0.0002947113945627026
43
  }
44
  }
run-4/checkpoint-268/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d75b109551f68ad4f0f12814a2fb4545b6f0e4d94a8e75020f66e88d8a06ea
3
  size 4856
run-4/checkpoint-402/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fa184fe52664b1fd1422fb1186d2c131a277fe4340bbecf4f067a18ebfea8d4
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da11eae0edb6bae20c6c05602f501038d10490654ac342a1bed244719ae89542
3
  size 409103316
run-4/checkpoint-402/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4aad271e6d658426d0d4432898d11ec71c1de8756433f91545e7345a21139f6
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2fd1d1f2e6e05d61a164a598fe811ae24a405e889cb6e8a0825e012ca4273f
3
  size 818327802
run-4/checkpoint-402/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe0f6a964ee8f879576dfe922482680498e583a2b9aa9084940f5fc6c0a74ee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737a486a5e0651c5b0f4054192d6d4d34363f7e89b41ec47064702648d3f7b4d
3
  size 1064
run-4/checkpoint-402/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6047325730323792,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,44 +10,44 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7248826291079812,
14
- "eval_loss": 0.6610550880432129,
15
- "eval_runtime": 2.0768,
16
- "eval_samples_per_second": 1025.597,
17
- "eval_steps_per_second": 64.521,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.755868544600939,
23
- "eval_loss": 0.6047325730323792,
24
- "eval_runtime": 2.0636,
25
- "eval_samples_per_second": 1032.163,
26
- "eval_steps_per_second": 64.934,
27
  "step": 268
28
  },
29
  {
30
  "epoch": 3.0,
31
- "eval_accuracy": 0.7568075117370892,
32
- "eval_loss": 0.6309530138969421,
33
- "eval_runtime": 2.6635,
34
- "eval_samples_per_second": 799.702,
35
- "eval_steps_per_second": 50.31,
36
  "step": 402
37
  }
38
  ],
39
  "logging_steps": 500,
40
- "max_steps": 804,
41
  "num_input_tokens_seen": 0,
42
- "num_train_epochs": 6,
43
  "save_steps": 500,
44
  "total_flos": 0,
45
  "train_batch_size": 64,
46
  "trial_name": null,
47
  "trial_params": {
48
- "learning_rate": 1.1434279877264858e-05,
49
- "num_train_epochs": 6,
50
  "per_device_train_batch_size": 64,
51
- "weight_decay": 0.007798418229484321
52
  }
53
  }
 
1
  {
2
+ "best_metric": 0.6025658845901489,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7352112676056338,
14
+ "eval_loss": 0.632646918296814,
15
+ "eval_runtime": 1.9849,
16
+ "eval_samples_per_second": 1073.124,
17
+ "eval_steps_per_second": 67.511,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.7539906103286385,
23
+ "eval_loss": 0.6025658845901489,
24
+ "eval_runtime": 2.0385,
25
+ "eval_samples_per_second": 1044.892,
26
+ "eval_steps_per_second": 65.735,
27
  "step": 268
28
  },
29
  {
30
  "epoch": 3.0,
31
+ "eval_accuracy": 0.760093896713615,
32
+ "eval_loss": 0.6695208549499512,
33
+ "eval_runtime": 2.0168,
34
+ "eval_samples_per_second": 1056.124,
35
+ "eval_steps_per_second": 66.442,
36
  "step": 402
37
  }
38
  ],
39
  "logging_steps": 500,
40
+ "max_steps": 536,
41
  "num_input_tokens_seen": 0,
42
+ "num_train_epochs": 4,
43
  "save_steps": 500,
44
  "total_flos": 0,
45
  "train_batch_size": 64,
46
  "trial_name": null,
47
  "trial_params": {
48
+ "learning_rate": 2.9536715900078686e-05,
49
+ "num_train_epochs": 4,
50
  "per_device_train_batch_size": 64,
51
+ "weight_decay": 0.0002947113945627026
52
  }
53
  }
run-4/checkpoint-402/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d75b109551f68ad4f0f12814a2fb4545b6f0e4d94a8e75020f66e88d8a06ea
3
  size 4856
run-4/checkpoint-536/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e7180d1e19a898f1beb292a73f8386dde07cf28e7b65dcfb30d59b43d88e596
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fa86ea147adb533de5eb931defb14a81be092df5d218a025df2be2d814c7ee
3
  size 409103316
run-4/checkpoint-536/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0945796164298b8dae5947303aaaf9d12dcf5f2fc2419842a33b25cbf85d840c
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea0156549a0314406690dc1226e505cfbc3e0d8632b34987df784f9e62ce161
3
  size 818327802
run-4/checkpoint-536/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51915d7c9d6b8048cea791a1c7b08ecfce8e5f266f0131ad618c93aed2f4d5cf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ee0f07b286c086c73cee7fc809e9534cbed92c08f78d38e5de402489003e2f
3
  size 1064
run-4/checkpoint-536/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6047325730323792,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
@@ -10,60 +10,60 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7248826291079812,
14
- "eval_loss": 0.6610550880432129,
15
- "eval_runtime": 2.0768,
16
- "eval_samples_per_second": 1025.597,
17
- "eval_steps_per_second": 64.521,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.755868544600939,
23
- "eval_loss": 0.6047325730323792,
24
- "eval_runtime": 2.0636,
25
- "eval_samples_per_second": 1032.163,
26
- "eval_steps_per_second": 64.934,
27
  "step": 268
28
  },
29
  {
30
  "epoch": 3.0,
31
- "eval_accuracy": 0.7568075117370892,
32
- "eval_loss": 0.6309530138969421,
33
- "eval_runtime": 2.6635,
34
- "eval_samples_per_second": 799.702,
35
- "eval_steps_per_second": 50.31,
36
  "step": 402
37
  },
38
  {
39
  "epoch": 3.73,
40
- "grad_norm": 9.669575691223145,
41
- "learning_rate": 4.323409306826513e-06,
42
- "loss": 0.5385,
43
  "step": 500
44
  },
45
  {
46
  "epoch": 4.0,
47
- "eval_accuracy": 0.7577464788732394,
48
- "eval_loss": 0.6676124334335327,
49
- "eval_runtime": 2.0995,
50
- "eval_samples_per_second": 1014.528,
51
- "eval_steps_per_second": 63.825,
52
  "step": 536
53
  }
54
  ],
55
  "logging_steps": 500,
56
- "max_steps": 804,
57
  "num_input_tokens_seen": 0,
58
- "num_train_epochs": 6,
59
  "save_steps": 500,
60
  "total_flos": 709335274032504.0,
61
  "train_batch_size": 64,
62
  "trial_name": null,
63
  "trial_params": {
64
- "learning_rate": 1.1434279877264858e-05,
65
- "num_train_epochs": 6,
66
  "per_device_train_batch_size": 64,
67
- "weight_decay": 0.007798418229484321
68
  }
69
  }
 
1
  {
2
+ "best_metric": 0.6025658845901489,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7352112676056338,
14
+ "eval_loss": 0.632646918296814,
15
+ "eval_runtime": 1.9849,
16
+ "eval_samples_per_second": 1073.124,
17
+ "eval_steps_per_second": 67.511,
18
  "step": 134
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.7539906103286385,
23
+ "eval_loss": 0.6025658845901489,
24
+ "eval_runtime": 2.0385,
25
+ "eval_samples_per_second": 1044.892,
26
+ "eval_steps_per_second": 65.735,
27
  "step": 268
28
  },
29
  {
30
  "epoch": 3.0,
31
+ "eval_accuracy": 0.760093896713615,
32
+ "eval_loss": 0.6695208549499512,
33
+ "eval_runtime": 2.0168,
34
+ "eval_samples_per_second": 1056.124,
35
+ "eval_steps_per_second": 66.442,
36
  "step": 402
37
  },
38
  {
39
  "epoch": 3.73,
40
+ "grad_norm": 11.244783401489258,
41
+ "learning_rate": 1.9838092768709567e-06,
42
+ "loss": 0.4285,
43
  "step": 500
44
  },
45
  {
46
  "epoch": 4.0,
47
+ "eval_accuracy": 0.7666666666666667,
48
+ "eval_loss": 0.7637132406234741,
49
+ "eval_runtime": 2.0114,
50
+ "eval_samples_per_second": 1058.984,
51
+ "eval_steps_per_second": 66.622,
52
  "step": 536
53
  }
54
  ],
55
  "logging_steps": 500,
56
+ "max_steps": 536,
57
  "num_input_tokens_seen": 0,
58
+ "num_train_epochs": 4,
59
  "save_steps": 500,
60
  "total_flos": 709335274032504.0,
61
  "train_batch_size": 64,
62
  "trial_name": null,
63
  "trial_params": {
64
+ "learning_rate": 2.9536715900078686e-05,
65
+ "num_train_epochs": 4,
66
  "per_device_train_batch_size": 64,
67
+ "weight_decay": 0.0002947113945627026
68
  }
69
  }
run-4/checkpoint-536/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d75b109551f68ad4f0f12814a2fb4545b6f0e4d94a8e75020f66e88d8a06ea
3
  size 4856
runs/Apr18_09-56-56_da5e8340a418/events.out.tfevents.1713436018.da5e8340a418.15241.24 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cff5eed09f5b19ba55a299f7f4a2f3f384ee57ac55d8d8c9179d1b02e9606fd
3
+ size 6768
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064739bef7fc58b5c8718d2766687070f99f1054b015389081d170a0b0689377
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d75b109551f68ad4f0f12814a2fb4545b6f0e4d94a8e75020f66e88d8a06ea
3
  size 4856