xuancoblab2023 commited on
Commit
74ee53a
·
verified ·
1 Parent(s): 10f7e8f

Training in progress, epoch 7

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. logs/events.out.tfevents.1713593661.1376c752d37a.9760.55 +3 -0
  2. logs/events.out.tfevents.1713593732.1376c752d37a.9760.56 +3 -0
  3. logs/events.out.tfevents.1713594035.1376c752d37a.9760.57 +3 -0
  4. logs/events.out.tfevents.1713594075.1376c752d37a.9760.58 +3 -0
  5. logs/events.out.tfevents.1713594312.1376c752d37a.9760.59 +3 -0
  6. logs/events.out.tfevents.1713594347.1376c752d37a.9760.60 +3 -0
  7. model.safetensors +1 -1
  8. run-2/checkpoint-214/model.safetensors +1 -1
  9. run-2/checkpoint-214/optimizer.pt +1 -1
  10. run-2/checkpoint-214/scheduler.pt +1 -1
  11. run-2/checkpoint-214/trainer_state.json +19 -19
  12. run-2/checkpoint-214/training_args.bin +1 -1
  13. run-3/checkpoint-214/model.safetensors +1 -1
  14. run-3/checkpoint-214/optimizer.pt +1 -1
  15. run-3/checkpoint-214/scheduler.pt +1 -1
  16. run-3/checkpoint-214/trainer_state.json +13 -13
  17. run-3/checkpoint-214/training_args.bin +1 -1
  18. run-3/checkpoint-3852/config.json +34 -0
  19. run-3/checkpoint-3852/model.safetensors +3 -0
  20. run-3/checkpoint-3852/optimizer.pt +3 -0
  21. run-3/checkpoint-3852/rng_state.pth +3 -0
  22. run-3/checkpoint-3852/scheduler.pt +3 -0
  23. run-3/checkpoint-3852/special_tokens_map.json +7 -0
  24. run-3/checkpoint-3852/tokenizer.json +0 -0
  25. run-3/checkpoint-3852/tokenizer_config.json +57 -0
  26. run-3/checkpoint-3852/trainer_state.json +386 -0
  27. run-3/checkpoint-3852/training_args.bin +3 -0
  28. run-3/checkpoint-3852/vocab.txt +0 -0
  29. run-4/checkpoint-214/model.safetensors +1 -1
  30. run-4/checkpoint-214/optimizer.pt +1 -1
  31. run-4/checkpoint-214/scheduler.pt +1 -1
  32. run-4/checkpoint-214/trainer_state.json +17 -17
  33. run-4/checkpoint-214/training_args.bin +1 -1
  34. run-5/checkpoint-2782/config.json +34 -0
  35. run-5/checkpoint-2782/model.safetensors +3 -0
  36. run-5/checkpoint-2782/optimizer.pt +3 -0
  37. run-5/checkpoint-2782/rng_state.pth +3 -0
  38. run-5/checkpoint-2782/scheduler.pt +3 -0
  39. run-5/checkpoint-2782/special_tokens_map.json +7 -0
  40. run-5/checkpoint-2782/tokenizer.json +0 -0
  41. run-5/checkpoint-2782/tokenizer_config.json +57 -0
  42. run-5/checkpoint-2782/trainer_state.json +286 -0
  43. run-5/checkpoint-2782/training_args.bin +3 -0
  44. run-5/checkpoint-2782/vocab.txt +0 -0
  45. run-5/checkpoint-2996/config.json +34 -0
  46. run-5/checkpoint-2996/model.safetensors +3 -0
  47. run-5/checkpoint-2996/optimizer.pt +3 -0
  48. run-5/checkpoint-2996/rng_state.pth +3 -0
  49. run-5/checkpoint-2996/scheduler.pt +3 -0
  50. run-5/checkpoint-2996/special_tokens_map.json +7 -0
logs/events.out.tfevents.1713593661.1376c752d37a.9760.55 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8009594d2527ae229e87afd4cb729c25d630f5825aff6d5e9f1e8d1389013799
3
+ size 8026
logs/events.out.tfevents.1713593732.1376c752d37a.9760.56 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cabe0008807ecd78a2c7b113878dca94a39998c3e10953233c42d951defc415
3
+ size 18247
logs/events.out.tfevents.1713594035.1376c752d37a.9760.57 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49beab12c4e3ef5ff3262c3ffb44c387c967351221d48e178adb032724929f46
3
+ size 6567
logs/events.out.tfevents.1713594075.1376c752d37a.9760.58 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:411418ac216d44061494f401acede03bec617323b15329c16314ff7f30207b16
3
+ size 15328
logs/events.out.tfevents.1713594312.1376c752d37a.9760.59 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c225c7802a7117f78efe0413b6a2b0c5a505c515b7bc04c091b004fce025484
3
+ size 6566
logs/events.out.tfevents.1713594347.1376c752d37a.9760.60 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a56bad7178fd15d9a32046dcac281f0bde891f44eea75aefec6cf9786cc89b8
3
+ size 10216
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbec1f24b0dd6424b800ee43d82ad08f76324c77508985422e71a486f73e8ae5
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4040c6ca96a2c15bdf2ff74f1b239c6f9435742bf73587eba68d44b6a1ab734e
3
  size 17549312
run-2/checkpoint-214/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a61439248471c62817c89323c5caa7b4350eb7c827fc7cd24810931f3852c1a
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b360bee990466984fb698a0ffcad6e057a232f7e1b6b33ab9bf2e2b704c904
3
  size 17549312
run-2/checkpoint-214/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e0f9fcb2d388bb05282f5a4689cf4c3ea085ee8b499953eafba6d798f5b7a3f
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2b0ab8d362426877965cce152dc2bf0ebd209219b6cd513cff47acfcbdbe9e
3
  size 35123898
run-2/checkpoint-214/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d4cb2348845564f50a1a5546a0d4bbac18f8f3ba05db9b86ec265c0fa8d0e64
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdbe5132d3e0d223635ff6bbe3337dce070c92e47612159f723c6739d0c83c3
3
  size 1064
run-2/checkpoint-214/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7223198594024605,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-214",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,37 +10,37 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.634866237640381,
14
- "learning_rate": 0.00022230485769079655,
15
- "loss": 0.4904,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7223198594024605,
21
- "eval_f1": 0.4261501210653753,
22
- "eval_loss": 0.456611692905426,
23
- "eval_mcc": 0.31390897314151045,
24
- "eval_precision": 0.6848249027237354,
25
- "eval_recall": 0.3093145869947276,
26
- "eval_runtime": 3.1485,
27
- "eval_samples_per_second": 542.168,
28
- "eval_steps_per_second": 17.151,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
- "max_steps": 1926,
34
  "num_input_tokens_seen": 0,
35
- "num_train_epochs": 9,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
- "alpha": 0.7932456774257669,
42
- "learning_rate": 0.00025009296490214613,
43
- "num_train_epochs": 9,
44
- "temperature": 23
45
  }
46
  }
 
1
  {
2
+ "best_metric": 0.6666666666666666,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-214",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8526851534843445,
14
+ "learning_rate": 3.348514285737282e-05,
15
+ "loss": 0.1338,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.11370033770799637,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 3.16,
27
+ "eval_samples_per_second": 540.192,
28
+ "eval_steps_per_second": 17.089,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
+ "max_steps": 856,
34
  "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 4,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "alpha": 0.1601102954722291,
42
+ "learning_rate": 4.464685714316376e-05,
43
+ "num_train_epochs": 4,
44
+ "temperature": 38
45
  }
46
  }
run-2/checkpoint-214/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89992d1c1b0b499ec7dee4b5a730ed684159d7afeb30eaab398451703cc62b58
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0ef365fe50a6073968feea42d2758a897793d1d86a57928a5d31624c85a571
3
  size 5048
run-3/checkpoint-214/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06a4d6bc70e2d5e8a2a3750ec5030e64a4a1396309fd7cd2c1e4cdda62b2f0e8
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59bfb9a3843c20c0d11eed6db1a213c1ff77b97d5015f24cff091378407cf13
3
  size 17549312
run-3/checkpoint-214/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65560c37a015d2f1bd16e59b8e7908b24ad628feebd46427c18a6cc54c5cbe0d
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce1b8c9cea898903bab38e2fe6ed752f8ee16dd59364f80665e6c1a81714dc0
3
  size 35123898
run-3/checkpoint-214/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621fabe3af1b7babf77acd5943b5bf3a0e4ad80bd636e511bd3fa3c04c3d8b9f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4587bd028004313ce34a98ed29db7c6b399fd8605802089960a4f4c58fd8dbea
3
  size 1064
run-3/checkpoint-214/trainer_state.json CHANGED
@@ -10,37 +10,37 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.1204113960266113,
14
- "learning_rate": 1.1932465475413847e-05,
15
- "loss": 0.4261,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.6666666666666666,
21
  "eval_f1": 0.0,
22
- "eval_loss": 0.4023595452308655,
23
  "eval_mcc": 0.0,
24
  "eval_precision": 0.0,
25
  "eval_recall": 0.0,
26
- "eval_runtime": 3.137,
27
- "eval_samples_per_second": 544.145,
28
- "eval_steps_per_second": 17.214,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
- "max_steps": 1070,
34
  "num_input_tokens_seen": 0,
35
- "num_train_epochs": 5,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
- "alpha": 0.6118639206182097,
42
- "learning_rate": 1.4915581844267308e-05,
43
- "num_train_epochs": 5,
44
- "temperature": 15
45
  }
46
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.38364019989967346,
14
+ "learning_rate": 3.063781435548824e-05,
15
+ "loss": 0.0796,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.6666666666666666,
21
  "eval_f1": 0.0,
22
+ "eval_loss": 0.054877836257219315,
23
  "eval_mcc": 0.0,
24
  "eval_precision": 0.0,
25
  "eval_recall": 0.0,
26
+ "eval_runtime": 3.1144,
27
+ "eval_samples_per_second": 548.1,
28
+ "eval_steps_per_second": 17.339,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
+ "max_steps": 3852,
34
  "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 18,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "alpha": 0.06448750556013427,
42
+ "learning_rate": 3.244003872934049e-05,
43
+ "num_train_epochs": 18,
44
+ "temperature": 34
45
  }
46
  }
run-3/checkpoint-214/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621c87997eb03e02a37c6a3e61f45ff57c6fdae7e493961bdadb2e1be324da40
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2974298f3dbf754c5d8751ea32f4e9bfa36e27ab58bf9e1d7f733ae1be63c4e3
3
  size 5048
run-3/checkpoint-3852/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-3/checkpoint-3852/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2db463d704c5ac2d16b27f58d7b92623567dbb475e66b95ccc1433e9d4529c
3
+ size 17549312
run-3/checkpoint-3852/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c98ff71eeaf7a4fc8c3a86b29e30cac41209d077c406d306c78bf3cd923a39
3
+ size 35123898
run-3/checkpoint-3852/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac98c31de125c5253fb0a106018efb545a119b35d21358b33194f6abc5079871
3
+ size 14308
run-3/checkpoint-3852/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ba2a23279c2a5c6295f07bbe3ebb6de15a74e3d93b6cc5dba2e9cf72e7175a
3
+ size 1064
run-3/checkpoint-3852/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-3852/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-3852/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-3/checkpoint-3852/trainer_state.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6666666666666666,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-214",
4
+ "epoch": 18.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3852,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.38364019989967346,
14
+ "learning_rate": 3.063781435548824e-05,
15
+ "loss": 0.0796,
16
+ "step": 214
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.054877836257219315,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 3.1144,
27
+ "eval_samples_per_second": 548.1,
28
+ "eval_steps_per_second": 17.339,
29
+ "step": 214
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "grad_norm": 0.44893428683280945,
34
+ "learning_rate": 2.883558998163599e-05,
35
+ "loss": 0.054,
36
+ "step": 428
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.6666666666666666,
41
+ "eval_f1": 0.0,
42
+ "eval_loss": 0.050605349242687225,
43
+ "eval_mcc": 0.0,
44
+ "eval_precision": 0.0,
45
+ "eval_recall": 0.0,
46
+ "eval_runtime": 3.2339,
47
+ "eval_samples_per_second": 527.845,
48
+ "eval_steps_per_second": 16.698,
49
+ "step": 428
50
+ },
51
+ {
52
+ "epoch": 3.0,
53
+ "grad_norm": 0.47303804755210876,
54
+ "learning_rate": 2.7033365607783743e-05,
55
+ "loss": 0.0517,
56
+ "step": 642
57
+ },
58
+ {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.6666666666666666,
61
+ "eval_f1": 0.0,
62
+ "eval_loss": 0.04936650022864342,
63
+ "eval_mcc": 0.0,
64
+ "eval_precision": 0.0,
65
+ "eval_recall": 0.0,
66
+ "eval_runtime": 3.1605,
67
+ "eval_samples_per_second": 540.104,
68
+ "eval_steps_per_second": 17.086,
69
+ "step": 642
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "grad_norm": 0.5809402465820312,
74
+ "learning_rate": 2.523114123393149e-05,
75
+ "loss": 0.0508,
76
+ "step": 856
77
+ },
78
+ {
79
+ "epoch": 4.0,
80
+ "eval_accuracy": 0.6666666666666666,
81
+ "eval_f1": 0.013864818024263433,
82
+ "eval_loss": 0.04913894087076187,
83
+ "eval_mcc": 0.024260699053001704,
84
+ "eval_precision": 0.5,
85
+ "eval_recall": 0.007029876977152899,
86
+ "eval_runtime": 3.8427,
87
+ "eval_samples_per_second": 444.219,
88
+ "eval_steps_per_second": 14.053,
89
+ "step": 856
90
+ },
91
+ {
92
+ "epoch": 5.0,
93
+ "grad_norm": 0.7497197389602661,
94
+ "learning_rate": 2.3428916860079242e-05,
95
+ "loss": 0.0505,
96
+ "step": 1070
97
+ },
98
+ {
99
+ "epoch": 5.0,
100
+ "eval_accuracy": 0.6666666666666666,
101
+ "eval_f1": 0.017271157167530225,
102
+ "eval_loss": 0.04886631295084953,
103
+ "eval_mcc": 0.027140265094376777,
104
+ "eval_precision": 0.5,
105
+ "eval_recall": 0.008787346221441126,
106
+ "eval_runtime": 3.1197,
107
+ "eval_samples_per_second": 547.164,
108
+ "eval_steps_per_second": 17.309,
109
+ "step": 1070
110
+ },
111
+ {
112
+ "epoch": 6.0,
113
+ "grad_norm": 0.3793525993824005,
114
+ "learning_rate": 2.162669248622699e-05,
115
+ "loss": 0.0503,
116
+ "step": 1284
117
+ },
118
+ {
119
+ "epoch": 6.0,
120
+ "eval_accuracy": 0.664323374340949,
121
+ "eval_f1": 0.017152658662092625,
122
+ "eval_loss": 0.048080265522003174,
123
+ "eval_mcc": 0.004592958330124466,
124
+ "eval_precision": 0.35714285714285715,
125
+ "eval_recall": 0.008787346221441126,
126
+ "eval_runtime": 3.3992,
127
+ "eval_samples_per_second": 502.172,
128
+ "eval_steps_per_second": 15.886,
129
+ "step": 1284
130
+ },
131
+ {
132
+ "epoch": 7.0,
133
+ "grad_norm": 0.525617778301239,
134
+ "learning_rate": 1.9824468112374745e-05,
135
+ "loss": 0.05,
136
+ "step": 1498
137
+ },
138
+ {
139
+ "epoch": 7.0,
140
+ "eval_accuracy": 0.664323374340949,
141
+ "eval_f1": 0.017152658662092625,
142
+ "eval_loss": 0.04796423017978668,
143
+ "eval_mcc": 0.004592958330124466,
144
+ "eval_precision": 0.35714285714285715,
145
+ "eval_recall": 0.008787346221441126,
146
+ "eval_runtime": 3.1569,
147
+ "eval_samples_per_second": 540.727,
148
+ "eval_steps_per_second": 17.106,
149
+ "step": 1498
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "grad_norm": 0.456257700920105,
154
+ "learning_rate": 1.8022243738522493e-05,
155
+ "loss": 0.0497,
156
+ "step": 1712
157
+ },
158
+ {
159
+ "epoch": 8.0,
160
+ "eval_accuracy": 0.6654950205038078,
161
+ "eval_f1": 0.017211703958691912,
162
+ "eval_loss": 0.04803091287612915,
163
+ "eval_mcc": 0.01487410293271824,
164
+ "eval_precision": 0.4166666666666667,
165
+ "eval_recall": 0.008787346221441126,
166
+ "eval_runtime": 3.822,
167
+ "eval_samples_per_second": 446.626,
168
+ "eval_steps_per_second": 14.129,
169
+ "step": 1712
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "grad_norm": 0.4184946119785309,
174
+ "learning_rate": 1.6220019364670245e-05,
175
+ "loss": 0.0498,
176
+ "step": 1926
177
+ },
178
+ {
179
+ "epoch": 9.0,
180
+ "eval_accuracy": 0.6666666666666666,
181
+ "eval_f1": 0.017271157167530225,
182
+ "eval_loss": 0.04849984124302864,
183
+ "eval_mcc": 0.027140265094376777,
184
+ "eval_precision": 0.5,
185
+ "eval_recall": 0.008787346221441126,
186
+ "eval_runtime": 3.1433,
187
+ "eval_samples_per_second": 543.052,
188
+ "eval_steps_per_second": 17.179,
189
+ "step": 1926
190
+ },
191
+ {
192
+ "epoch": 10.0,
193
+ "grad_norm": 0.32953304052352905,
194
+ "learning_rate": 1.4417794990817994e-05,
195
+ "loss": 0.0494,
196
+ "step": 2140
197
+ },
198
+ {
199
+ "epoch": 10.0,
200
+ "eval_accuracy": 0.664323374340949,
201
+ "eval_f1": 0.017152658662092625,
202
+ "eval_loss": 0.04773561656475067,
203
+ "eval_mcc": 0.004592958330124466,
204
+ "eval_precision": 0.35714285714285715,
205
+ "eval_recall": 0.008787346221441126,
206
+ "eval_runtime": 3.2081,
207
+ "eval_samples_per_second": 532.094,
208
+ "eval_steps_per_second": 16.833,
209
+ "step": 2140
210
+ },
211
+ {
212
+ "epoch": 11.0,
213
+ "grad_norm": 0.2775495946407318,
214
+ "learning_rate": 1.2615570616965746e-05,
215
+ "loss": 0.0494,
216
+ "step": 2354
217
+ },
218
+ {
219
+ "epoch": 11.0,
220
+ "eval_accuracy": 0.664323374340949,
221
+ "eval_f1": 0.017152658662092625,
222
+ "eval_loss": 0.04799096658825874,
223
+ "eval_mcc": 0.004592958330124466,
224
+ "eval_precision": 0.35714285714285715,
225
+ "eval_recall": 0.008787346221441126,
226
+ "eval_runtime": 3.142,
227
+ "eval_samples_per_second": 543.29,
228
+ "eval_steps_per_second": 17.187,
229
+ "step": 2354
230
+ },
231
+ {
232
+ "epoch": 12.0,
233
+ "grad_norm": 0.2784470319747925,
234
+ "learning_rate": 1.0813346243113495e-05,
235
+ "loss": 0.0494,
236
+ "step": 2568
237
+ },
238
+ {
239
+ "epoch": 12.0,
240
+ "eval_accuracy": 0.6660808435852372,
241
+ "eval_f1": 0.01724137931034483,
242
+ "eval_loss": 0.04788310080766678,
243
+ "eval_mcc": 0.020707884164064556,
244
+ "eval_precision": 0.45454545454545453,
245
+ "eval_recall": 0.008787346221441126,
246
+ "eval_runtime": 3.8964,
247
+ "eval_samples_per_second": 438.099,
248
+ "eval_steps_per_second": 13.859,
249
+ "step": 2568
250
+ },
251
+ {
252
+ "epoch": 13.0,
253
+ "grad_norm": 0.7122122049331665,
254
+ "learning_rate": 9.011121869261247e-06,
255
+ "loss": 0.0493,
256
+ "step": 2782
257
+ },
258
+ {
259
+ "epoch": 13.0,
260
+ "eval_accuracy": 0.6654950205038078,
261
+ "eval_f1": 0.017211703958691912,
262
+ "eval_loss": 0.04763857275247574,
263
+ "eval_mcc": 0.01487410293271824,
264
+ "eval_precision": 0.4166666666666667,
265
+ "eval_recall": 0.008787346221441126,
266
+ "eval_runtime": 3.1513,
267
+ "eval_samples_per_second": 541.682,
268
+ "eval_steps_per_second": 17.136,
269
+ "step": 2782
270
+ },
271
+ {
272
+ "epoch": 14.0,
273
+ "grad_norm": 0.3367031216621399,
274
+ "learning_rate": 7.208897495408997e-06,
275
+ "loss": 0.0491,
276
+ "step": 2996
277
+ },
278
+ {
279
+ "epoch": 14.0,
280
+ "eval_accuracy": 0.6649091974223784,
281
+ "eval_f1": 0.01718213058419244,
282
+ "eval_loss": 0.0474877767264843,
283
+ "eval_mcc": 0.009529862152017439,
284
+ "eval_precision": 0.38461538461538464,
285
+ "eval_recall": 0.008787346221441126,
286
+ "eval_runtime": 3.2218,
287
+ "eval_samples_per_second": 529.831,
288
+ "eval_steps_per_second": 16.761,
289
+ "step": 2996
290
+ },
291
+ {
292
+ "epoch": 15.0,
293
+ "grad_norm": 0.6209991574287415,
294
+ "learning_rate": 5.406673121556748e-06,
295
+ "loss": 0.049,
296
+ "step": 3210
297
+ },
298
+ {
299
+ "epoch": 15.0,
300
+ "eval_accuracy": 0.6649091974223784,
301
+ "eval_f1": 0.01718213058419244,
302
+ "eval_loss": 0.04748029261827469,
303
+ "eval_mcc": 0.009529862152017439,
304
+ "eval_precision": 0.38461538461538464,
305
+ "eval_recall": 0.008787346221441126,
306
+ "eval_runtime": 3.1605,
307
+ "eval_samples_per_second": 540.101,
308
+ "eval_steps_per_second": 17.086,
309
+ "step": 3210
310
+ },
311
+ {
312
+ "epoch": 16.0,
313
+ "grad_norm": 0.8616418838500977,
314
+ "learning_rate": 3.6044487477044986e-06,
315
+ "loss": 0.0491,
316
+ "step": 3424
317
+ },
318
+ {
319
+ "epoch": 16.0,
320
+ "eval_accuracy": 0.664323374340949,
321
+ "eval_f1": 0.017152658662092625,
322
+ "eval_loss": 0.04751123487949371,
323
+ "eval_mcc": 0.004592958330124466,
324
+ "eval_precision": 0.35714285714285715,
325
+ "eval_recall": 0.008787346221441126,
326
+ "eval_runtime": 3.9087,
327
+ "eval_samples_per_second": 436.72,
328
+ "eval_steps_per_second": 13.815,
329
+ "step": 3424
330
+ },
331
+ {
332
+ "epoch": 17.0,
333
+ "grad_norm": 0.38889187574386597,
334
+ "learning_rate": 1.8022243738522493e-06,
335
+ "loss": 0.0493,
336
+ "step": 3638
337
+ },
338
+ {
339
+ "epoch": 17.0,
340
+ "eval_accuracy": 0.6649091974223784,
341
+ "eval_f1": 0.01718213058419244,
342
+ "eval_loss": 0.0475541353225708,
343
+ "eval_mcc": 0.009529862152017439,
344
+ "eval_precision": 0.38461538461538464,
345
+ "eval_recall": 0.008787346221441126,
346
+ "eval_runtime": 3.1517,
347
+ "eval_samples_per_second": 541.62,
348
+ "eval_steps_per_second": 17.134,
349
+ "step": 3638
350
+ },
351
+ {
352
+ "epoch": 18.0,
353
+ "grad_norm": 0.3475455343723297,
354
+ "learning_rate": 0.0,
355
+ "loss": 0.0489,
356
+ "step": 3852
357
+ },
358
+ {
359
+ "epoch": 18.0,
360
+ "eval_accuracy": 0.6649091974223784,
361
+ "eval_f1": 0.01718213058419244,
362
+ "eval_loss": 0.047414712607860565,
363
+ "eval_mcc": 0.009529862152017439,
364
+ "eval_precision": 0.38461538461538464,
365
+ "eval_recall": 0.008787346221441126,
366
+ "eval_runtime": 3.2036,
367
+ "eval_samples_per_second": 532.83,
368
+ "eval_steps_per_second": 16.856,
369
+ "step": 3852
370
+ }
371
+ ],
372
+ "logging_steps": 500,
373
+ "max_steps": 3852,
374
+ "num_input_tokens_seen": 0,
375
+ "num_train_epochs": 18,
376
+ "save_steps": 500,
377
+ "total_flos": 9445961959920.0,
378
+ "train_batch_size": 32,
379
+ "trial_name": null,
380
+ "trial_params": {
381
+ "alpha": 0.06448750556013427,
382
+ "learning_rate": 3.244003872934049e-05,
383
+ "num_train_epochs": 18,
384
+ "temperature": 34
385
+ }
386
+ }
run-3/checkpoint-3852/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2974298f3dbf754c5d8751ea32f4e9bfa36e27ab58bf9e1d7f733ae1be63c4e3
3
+ size 5048
run-3/checkpoint-3852/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-4/checkpoint-214/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71d71cc0170d97b0d4b7e7466c630a9d9ff3ee87141ddf749be8d1e2e1d8a609
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c83e7f31749d1db762720fa59ec35fd79e6cea9e91b79fca07f07a447ae4f58
3
  size 17549312
run-4/checkpoint-214/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30f43b455266116636b8f0a7f1a17b3274aea5604e913d74557f221fc1c81296
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:029bc1a70c1c7bd15148c3bcf6cd9f3c8825cc89954ba2b975896d9303c6d3b0
3
  size 35123898
run-4/checkpoint-214/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043caffb6c1012e63ca62bbd130191c7f91382166fb0be80e0dc0e0eaac22952
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bb7330760cad4ea8091ac008b3e308e81d33db12960799335b5b09551e25daf
3
  size 1064
run-4/checkpoint-214/trainer_state.json CHANGED
@@ -10,37 +10,37 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 5.794419765472412,
14
- "learning_rate": 3.974483565917368e-05,
15
- "loss": 0.5313,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.6666666666666666,
21
- "eval_f1": 0.0035026269702276708,
22
- "eval_loss": 0.49643149971961975,
23
- "eval_mcc": 0.01210898699241207,
24
- "eval_precision": 0.5,
25
- "eval_recall": 0.0017574692442882249,
26
- "eval_runtime": 3.1332,
27
- "eval_samples_per_second": 544.813,
28
- "eval_steps_per_second": 17.235,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
- "max_steps": 1498,
34
  "num_input_tokens_seen": 0,
35
- "num_train_epochs": 7,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
- "alpha": 0.8274705871724447,
42
- "learning_rate": 4.6368974935702624e-05,
43
- "num_train_epochs": 7,
44
- "temperature": 26
45
  }
46
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.9788228869438171,
14
+ "learning_rate": 9.270473337157551e-06,
15
+ "loss": 0.2545,
16
  "step": 214
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.22835808992385864,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 3.1407,
27
+ "eval_samples_per_second": 543.501,
28
+ "eval_steps_per_second": 17.193,
29
  "step": 214
30
  }
31
  ],
32
  "logging_steps": 500,
33
+ "max_steps": 428,
34
  "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 2,
36
  "save_steps": 500,
37
  "total_flos": 524775664440.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "alpha": 0.3287651702725557,
42
+ "learning_rate": 1.8540946674315103e-05,
43
+ "num_train_epochs": 2,
44
+ "temperature": 24
45
  }
46
  }
run-4/checkpoint-214/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a66c3611aaa8b227e5b872f0aa4e2f90b172c191cd1eb23826a153b2caba18a
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf88101ef410a660c50d8f7a9f32103487a0921f9d4b081d4ef174140c5aaa94
3
  size 5048
run-5/checkpoint-2782/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-5/checkpoint-2782/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331bbba94eb6a1006afae76a1d1473d7d1cd9e31966fb7f60a0c22edfa9f5f4f
3
+ size 17549312
run-5/checkpoint-2782/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da62a3c2e8fb2d8738fa85c112304f7e6cdf85b0ed9557393b7c7697575cdd3
3
+ size 35123898
run-5/checkpoint-2782/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b220e5ea4ac040566c4076b4aa2ab507f558ff52391b72ff0465738494290d
3
+ size 14308
run-5/checkpoint-2782/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e008df3d505ab700cf418f5978e6fe4cb486b1718c4d14d2b1b7199d6b89cf
3
+ size 1064
run-5/checkpoint-2782/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-5/checkpoint-2782/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-2782/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-5/checkpoint-2782/trainer_state.json ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7217340363210311,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-2782",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2782,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 2.161219835281372,
14
+ "learning_rate": 0.0001946740481873714,
15
+ "loss": 0.2896,
16
+ "step": 214
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.27429890632629395,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 3.1537,
27
+ "eval_samples_per_second": 541.276,
28
+ "eval_steps_per_second": 17.123,
29
+ "step": 214
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "grad_norm": 1.0970500707626343,
34
+ "learning_rate": 0.00017969912140372742,
35
+ "loss": 0.2734,
36
+ "step": 428
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.6666666666666666,
41
+ "eval_f1": 0.006980802792321117,
42
+ "eval_loss": 0.2710207402706146,
43
+ "eval_mcc": 0.01713474628469157,
44
+ "eval_precision": 0.5,
45
+ "eval_recall": 0.0035149384885764497,
46
+ "eval_runtime": 3.9301,
47
+ "eval_samples_per_second": 434.342,
48
+ "eval_steps_per_second": 13.74,
49
+ "step": 428
50
+ },
51
+ {
52
+ "epoch": 3.0,
53
+ "grad_norm": 0.7171841859817505,
54
+ "learning_rate": 0.0001647241946200835,
55
+ "loss": 0.2685,
56
+ "step": 642
57
+ },
58
+ {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.6678383128295254,
61
+ "eval_f1": 0.010471204188481676,
62
+ "eval_loss": 0.27606382966041565,
63
+ "eval_mcc": 0.042836865711728934,
64
+ "eval_precision": 0.75,
65
+ "eval_recall": 0.005272407732864675,
66
+ "eval_runtime": 3.1441,
67
+ "eval_samples_per_second": 542.926,
68
+ "eval_steps_per_second": 17.175,
69
+ "step": 642
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "grad_norm": 1.334978699684143,
74
+ "learning_rate": 0.00014974926783643954,
75
+ "loss": 0.266,
76
+ "step": 856
77
+ },
78
+ {
79
+ "epoch": 4.0,
80
+ "eval_accuracy": 0.6795547744581136,
81
+ "eval_f1": 0.10180623973727422,
82
+ "eval_loss": 0.26485475897789,
83
+ "eval_mcc": 0.14513196526792949,
84
+ "eval_precision": 0.775,
85
+ "eval_recall": 0.054481546572934976,
86
+ "eval_runtime": 3.1938,
87
+ "eval_samples_per_second": 534.478,
88
+ "eval_steps_per_second": 16.908,
89
+ "step": 856
90
+ },
91
+ {
92
+ "epoch": 5.0,
93
+ "grad_norm": 1.377930998802185,
94
+ "learning_rate": 0.0001347743410527956,
95
+ "loss": 0.2643,
96
+ "step": 1070
97
+ },
98
+ {
99
+ "epoch": 5.0,
100
+ "eval_accuracy": 0.6918570591681312,
101
+ "eval_f1": 0.18827160493827164,
102
+ "eval_loss": 0.26378217339515686,
103
+ "eval_mcc": 0.20505841470507494,
104
+ "eval_precision": 0.7721518987341772,
105
+ "eval_recall": 0.10720562390158173,
106
+ "eval_runtime": 3.1292,
107
+ "eval_samples_per_second": 545.505,
108
+ "eval_steps_per_second": 17.257,
109
+ "step": 1070
110
+ },
111
+ {
112
+ "epoch": 6.0,
113
+ "grad_norm": 1.2771140336990356,
114
+ "learning_rate": 0.00011979941426915163,
115
+ "loss": 0.263,
116
+ "step": 1284
117
+ },
118
+ {
119
+ "epoch": 6.0,
120
+ "eval_accuracy": 0.6936145284124194,
121
+ "eval_f1": 0.18662519440124417,
122
+ "eval_loss": 0.26149189472198486,
123
+ "eval_mcc": 0.2156164618376391,
124
+ "eval_precision": 0.8108108108108109,
125
+ "eval_recall": 0.1054481546572935,
126
+ "eval_runtime": 3.2914,
127
+ "eval_samples_per_second": 518.626,
128
+ "eval_steps_per_second": 16.406,
129
+ "step": 1284
130
+ },
131
+ {
132
+ "epoch": 7.0,
133
+ "grad_norm": 1.073453426361084,
134
+ "learning_rate": 0.00010482448748550767,
135
+ "loss": 0.2612,
136
+ "step": 1498
137
+ },
138
+ {
139
+ "epoch": 7.0,
140
+ "eval_accuracy": 0.6994727592267135,
141
+ "eval_f1": 0.21439509954058195,
142
+ "eval_loss": 0.2620287537574768,
143
+ "eval_mcc": 0.24129962353457945,
144
+ "eval_precision": 0.8333333333333334,
145
+ "eval_recall": 0.12302284710017575,
146
+ "eval_runtime": 3.1567,
147
+ "eval_samples_per_second": 540.751,
148
+ "eval_steps_per_second": 17.106,
149
+ "step": 1498
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "grad_norm": 1.2691621780395508,
154
+ "learning_rate": 8.984956070186371e-05,
155
+ "loss": 0.2597,
156
+ "step": 1712
157
+ },
158
+ {
159
+ "epoch": 8.0,
160
+ "eval_accuracy": 0.69302870533099,
161
+ "eval_f1": 0.17088607594936708,
162
+ "eval_loss": 0.2611652910709381,
163
+ "eval_mcc": 0.21751991027491313,
164
+ "eval_precision": 0.8571428571428571,
165
+ "eval_recall": 0.09490333919156414,
166
+ "eval_runtime": 3.2468,
167
+ "eval_samples_per_second": 525.752,
168
+ "eval_steps_per_second": 16.632,
169
+ "step": 1712
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "grad_norm": 1.0226393938064575,
174
+ "learning_rate": 7.487463391821977e-05,
175
+ "loss": 0.2597,
176
+ "step": 1926
177
+ },
178
+ {
179
+ "epoch": 9.0,
180
+ "eval_accuracy": 0.6977152899824253,
181
+ "eval_f1": 0.19626168224299068,
182
+ "eval_loss": 0.2611730098724365,
183
+ "eval_mcc": 0.2374955820778862,
184
+ "eval_precision": 0.863013698630137,
185
+ "eval_recall": 0.11072056239015818,
186
+ "eval_runtime": 3.1639,
187
+ "eval_samples_per_second": 539.53,
188
+ "eval_steps_per_second": 17.068,
189
+ "step": 1926
190
+ },
191
+ {
192
+ "epoch": 10.0,
193
+ "grad_norm": 1.0377492904663086,
194
+ "learning_rate": 5.989970713457581e-05,
195
+ "loss": 0.2565,
196
+ "step": 2140
197
+ },
198
+ {
199
+ "epoch": 10.0,
200
+ "eval_accuracy": 0.7193907439953134,
201
+ "eval_f1": 0.32248939179632247,
202
+ "eval_loss": 0.260220468044281,
203
+ "eval_mcc": 0.310001756502818,
204
+ "eval_precision": 0.8260869565217391,
205
+ "eval_recall": 0.20035149384885764,
206
+ "eval_runtime": 3.2066,
207
+ "eval_samples_per_second": 532.341,
208
+ "eval_steps_per_second": 16.84,
209
+ "step": 2140
210
+ },
211
+ {
212
+ "epoch": 11.0,
213
+ "grad_norm": 1.2514437437057495,
214
+ "learning_rate": 4.4924780350931855e-05,
215
+ "loss": 0.2555,
216
+ "step": 2354
217
+ },
218
+ {
219
+ "epoch": 11.0,
220
+ "eval_accuracy": 0.700058582308143,
221
+ "eval_f1": 0.20743034055727552,
222
+ "eval_loss": 0.26075002551078796,
223
+ "eval_mcc": 0.2474956228703306,
224
+ "eval_precision": 0.8701298701298701,
225
+ "eval_recall": 0.11775043936731107,
226
+ "eval_runtime": 3.1394,
227
+ "eval_samples_per_second": 543.734,
228
+ "eval_steps_per_second": 17.201,
229
+ "step": 2354
230
+ },
231
+ {
232
+ "epoch": 12.0,
233
+ "grad_norm": 0.8049026727676392,
234
+ "learning_rate": 2.9949853567287906e-05,
235
+ "loss": 0.2544,
236
+ "step": 2568
237
+ },
238
+ {
239
+ "epoch": 12.0,
240
+ "eval_accuracy": 0.715875805506737,
241
+ "eval_f1": 0.31593794076163606,
242
+ "eval_loss": 0.2587771415710449,
243
+ "eval_mcc": 0.29589835954792404,
244
+ "eval_precision": 0.8,
245
+ "eval_recall": 0.1968365553602812,
246
+ "eval_runtime": 3.3501,
247
+ "eval_samples_per_second": 509.539,
248
+ "eval_steps_per_second": 16.119,
249
+ "step": 2568
250
+ },
251
+ {
252
+ "epoch": 13.0,
253
+ "grad_norm": 2.94110369682312,
254
+ "learning_rate": 1.4974926783643953e-05,
255
+ "loss": 0.2544,
256
+ "step": 2782
257
+ },
258
+ {
259
+ "epoch": 13.0,
260
+ "eval_accuracy": 0.7217340363210311,
261
+ "eval_f1": 0.3356643356643356,
262
+ "eval_loss": 0.2589167356491089,
263
+ "eval_mcc": 0.31697199705587376,
264
+ "eval_precision": 0.821917808219178,
265
+ "eval_recall": 0.210896309314587,
266
+ "eval_runtime": 3.165,
267
+ "eval_samples_per_second": 539.337,
268
+ "eval_steps_per_second": 17.062,
269
+ "step": 2782
270
+ }
271
+ ],
272
+ "logging_steps": 500,
273
+ "max_steps": 2996,
274
+ "num_input_tokens_seen": 0,
275
+ "num_train_epochs": 14,
276
+ "save_steps": 500,
277
+ "total_flos": 6822083637720.0,
278
+ "train_batch_size": 32,
279
+ "trial_name": null,
280
+ "trial_params": {
281
+ "alpha": 0.43581262355237016,
282
+ "learning_rate": 0.00020964897497101535,
283
+ "num_train_epochs": 14,
284
+ "temperature": 35
285
+ }
286
+ }
run-5/checkpoint-2782/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89392d1670280eb2d3bf633d0da5562575ac3ebc9583dacddc332092c977234d
3
+ size 5048
run-5/checkpoint-2782/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-2996/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-5/checkpoint-2996/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3315a30671a1f6e5e8a7bfe549057fee83be716b89b06a9d1c73518097673e
3
+ size 17549312
run-5/checkpoint-2996/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c7996f14d3ea31a3af30df85437fddf69d2a04bf0666fb4a0826c0b502355c
3
+ size 35123898
run-5/checkpoint-2996/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9426185b39d441044107ee0bb63490b78521dba8c90fe388accfdd0dbcbf9fec
3
+ size 14308
run-5/checkpoint-2996/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c21195eb47c24552f665c11538dfa091aef97aa8bc90b5acf0569f770ed56453
3
+ size 1064
run-5/checkpoint-2996/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }