xuancoblab2023 commited on
Commit
4152099
·
verified ·
1 Parent(s): 4783c7b

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1709715985.adc675a344d5.67573.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9acc5eb7335b61c91611b864a938306b814c637992b4390e080c3e8891d7339d
3
- size 8045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dcb09ad5d87fa173598de0cb304e037191d17fa52f63d2678b128f1a3bea2d
3
+ size 9765
logs/events.out.tfevents.1709716821.adc675a344d5.67573.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cfec70353e1792393f9035254bbeb24c0f5630ab9c360b83e0ba97f5cb09f47
3
+ size 5314
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65886fa7d4e37abb88a6bb6334b633e6fb683d24bbbc672b61cc55794f5aa22
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12de485927495a54a42aa2b293b0adf412d00170117941f81fbe4835dce02946
3
  size 17549312
run-1/checkpoint-576/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-1/checkpoint-576/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:474886eddcc483c10f2108ad7b1acefb8cec7c1c33b79fd9734ccf4b673298cf
3
+ size 17549312
run-1/checkpoint-576/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7310314ec6230e52339beb4c4ba7c73aa19dc8903f9d01cfaeac54bd923800
3
+ size 35122746
run-1/checkpoint-576/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4ad45514321df5f64172bb2de61ac319a093d9f31cf01031977fb88be20551
3
+ size 14054
run-1/checkpoint-576/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36d6023f1f77dae10d4ac56a76a2f9c7283c3168999cd68db2690e4268c4e92e
3
+ size 1064
run-1/checkpoint-576/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-576/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-576/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-1/checkpoint-576/trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8160469667318982,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-576",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 576,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.0269759893417358,
14
+ "learning_rate": 5.9043684647359026e-05,
15
+ "loss": 0.5339,
16
+ "step": 96
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7035225048923679,
21
+ "eval_f1": 0.6651933701657459,
22
+ "eval_loss": 0.4725688099861145,
23
+ "eval_precision": 0.7639593908629442,
24
+ "eval_recall": 0.589041095890411,
25
+ "eval_runtime": 28.4268,
26
+ "eval_samples_per_second": 35.952,
27
+ "eval_steps_per_second": 1.126,
28
+ "step": 96
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.4684391021728516,
33
+ "learning_rate": 4.920307053946586e-05,
34
+ "loss": 0.4589,
35
+ "step": 192
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.7563600782778865,
40
+ "eval_f1": 0.7537091988130563,
41
+ "eval_loss": 0.4252457022666931,
42
+ "eval_precision": 0.762,
43
+ "eval_recall": 0.7455968688845401,
44
+ "eval_runtime": 28.3965,
45
+ "eval_samples_per_second": 35.99,
46
+ "eval_steps_per_second": 1.127,
47
+ "step": 192
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.83766770362854,
52
+ "learning_rate": 3.9362456431572686e-05,
53
+ "loss": 0.4336,
54
+ "step": 288
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.8013698630136986,
59
+ "eval_f1": 0.8122109158186863,
60
+ "eval_loss": 0.41349494457244873,
61
+ "eval_precision": 0.7701754385964912,
62
+ "eval_recall": 0.8590998043052838,
63
+ "eval_runtime": 28.5368,
64
+ "eval_samples_per_second": 35.813,
65
+ "eval_steps_per_second": 1.121,
66
+ "step": 288
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 1.8088363409042358,
71
+ "learning_rate": 2.9521842323679513e-05,
72
+ "loss": 0.424,
73
+ "step": 384
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.7945205479452054,
78
+ "eval_f1": 0.7965116279069767,
79
+ "eval_loss": 0.40849176049232483,
80
+ "eval_precision": 0.7888675623800384,
81
+ "eval_recall": 0.8043052837573386,
82
+ "eval_runtime": 29.2183,
83
+ "eval_samples_per_second": 34.978,
84
+ "eval_steps_per_second": 1.095,
85
+ "step": 384
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 1.6147538423538208,
90
+ "learning_rate": 1.9681228215786343e-05,
91
+ "loss": 0.4186,
92
+ "step": 480
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8062622309197651,
97
+ "eval_f1": 0.818014705882353,
98
+ "eval_loss": 0.40458473563194275,
99
+ "eval_precision": 0.7712305025996534,
100
+ "eval_recall": 0.8708414872798435,
101
+ "eval_runtime": 30.48,
102
+ "eval_samples_per_second": 33.53,
103
+ "eval_steps_per_second": 1.05,
104
+ "step": 480
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 3.4918863773345947,
109
+ "learning_rate": 9.840614107893172e-06,
110
+ "loss": 0.4148,
111
+ "step": 576
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8160469667318982,
116
+ "eval_f1": 0.8312387791741472,
117
+ "eval_loss": 0.40512770414352417,
118
+ "eval_precision": 0.7678275290215588,
119
+ "eval_recall": 0.9060665362035225,
120
+ "eval_runtime": 30.2589,
121
+ "eval_samples_per_second": 33.775,
122
+ "eval_steps_per_second": 1.058,
123
+ "step": 576
124
+ }
125
+ ],
126
+ "logging_steps": 500,
127
+ "max_steps": 672,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 7,
130
+ "save_steps": 500,
131
+ "total_flos": 1414171183680.0,
132
+ "train_batch_size": 32,
133
+ "trial_name": null,
134
+ "trial_params": {
135
+ "alpha": 0.7341280696937652,
136
+ "learning_rate": 6.88842987552522e-05,
137
+ "num_train_epochs": 7,
138
+ "temperature": 8
139
+ }
140
+ }
run-1/checkpoint-576/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846c082dceab6915234c6a2aabd837d23103edc099022b8d844818635e620020
3
+ size 4920
run-1/checkpoint-576/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-672/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-1/checkpoint-672/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:933bac358a29fe312e1f41c4fc929c6e84f13b17a142c5532d9a712db3dc486c
3
+ size 17549312
run-1/checkpoint-672/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20412691f28d9c93e83ba6a77e3281cc56e7cd7d1f034a6feea42daceca483f4
3
+ size 35122746
run-1/checkpoint-672/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a39c6918f3fd24e7a04a4220bba5fcc478d9db2386daee293756dba26ecf21ac
3
+ size 14054
run-1/checkpoint-672/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a1def38e3d0091ae61fc1a9daff613116fc2c57d2522dab2e3fd226f81b3f4f
3
+ size 1064
run-1/checkpoint-672/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-672/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-672/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-1/checkpoint-672/trainer_state.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8160469667318982,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-576",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 672,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.0269759893417358,
14
+ "learning_rate": 5.9043684647359026e-05,
15
+ "loss": 0.5339,
16
+ "step": 96
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7035225048923679,
21
+ "eval_f1": 0.6651933701657459,
22
+ "eval_loss": 0.4725688099861145,
23
+ "eval_precision": 0.7639593908629442,
24
+ "eval_recall": 0.589041095890411,
25
+ "eval_runtime": 28.4268,
26
+ "eval_samples_per_second": 35.952,
27
+ "eval_steps_per_second": 1.126,
28
+ "step": 96
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.4684391021728516,
33
+ "learning_rate": 4.920307053946586e-05,
34
+ "loss": 0.4589,
35
+ "step": 192
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.7563600782778865,
40
+ "eval_f1": 0.7537091988130563,
41
+ "eval_loss": 0.4252457022666931,
42
+ "eval_precision": 0.762,
43
+ "eval_recall": 0.7455968688845401,
44
+ "eval_runtime": 28.3965,
45
+ "eval_samples_per_second": 35.99,
46
+ "eval_steps_per_second": 1.127,
47
+ "step": 192
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.83766770362854,
52
+ "learning_rate": 3.9362456431572686e-05,
53
+ "loss": 0.4336,
54
+ "step": 288
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.8013698630136986,
59
+ "eval_f1": 0.8122109158186863,
60
+ "eval_loss": 0.41349494457244873,
61
+ "eval_precision": 0.7701754385964912,
62
+ "eval_recall": 0.8590998043052838,
63
+ "eval_runtime": 28.5368,
64
+ "eval_samples_per_second": 35.813,
65
+ "eval_steps_per_second": 1.121,
66
+ "step": 288
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 1.8088363409042358,
71
+ "learning_rate": 2.9521842323679513e-05,
72
+ "loss": 0.424,
73
+ "step": 384
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.7945205479452054,
78
+ "eval_f1": 0.7965116279069767,
79
+ "eval_loss": 0.40849176049232483,
80
+ "eval_precision": 0.7888675623800384,
81
+ "eval_recall": 0.8043052837573386,
82
+ "eval_runtime": 29.2183,
83
+ "eval_samples_per_second": 34.978,
84
+ "eval_steps_per_second": 1.095,
85
+ "step": 384
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 1.6147538423538208,
90
+ "learning_rate": 1.9681228215786343e-05,
91
+ "loss": 0.4186,
92
+ "step": 480
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8062622309197651,
97
+ "eval_f1": 0.818014705882353,
98
+ "eval_loss": 0.40458473563194275,
99
+ "eval_precision": 0.7712305025996534,
100
+ "eval_recall": 0.8708414872798435,
101
+ "eval_runtime": 30.48,
102
+ "eval_samples_per_second": 33.53,
103
+ "eval_steps_per_second": 1.05,
104
+ "step": 480
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 3.4918863773345947,
109
+ "learning_rate": 9.840614107893172e-06,
110
+ "loss": 0.4148,
111
+ "step": 576
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8160469667318982,
116
+ "eval_f1": 0.8312387791741472,
117
+ "eval_loss": 0.40512770414352417,
118
+ "eval_precision": 0.7678275290215588,
119
+ "eval_recall": 0.9060665362035225,
120
+ "eval_runtime": 30.2589,
121
+ "eval_samples_per_second": 33.775,
122
+ "eval_steps_per_second": 1.058,
123
+ "step": 576
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 1.9054783582687378,
128
+ "learning_rate": 0.0,
129
+ "loss": 0.4095,
130
+ "step": 672
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.8140900195694716,
135
+ "eval_f1": 0.8269581056466302,
136
+ "eval_loss": 0.4025559723377228,
137
+ "eval_precision": 0.7734241908006815,
138
+ "eval_recall": 0.8884540117416829,
139
+ "eval_runtime": 29.9441,
140
+ "eval_samples_per_second": 34.13,
141
+ "eval_steps_per_second": 1.069,
142
+ "step": 672
143
+ }
144
+ ],
145
+ "logging_steps": 500,
146
+ "max_steps": 672,
147
+ "num_input_tokens_seen": 0,
148
+ "num_train_epochs": 7,
149
+ "save_steps": 500,
150
+ "total_flos": 1649866380960.0,
151
+ "train_batch_size": 32,
152
+ "trial_name": null,
153
+ "trial_params": {
154
+ "alpha": 0.7341280696937652,
155
+ "learning_rate": 6.88842987552522e-05,
156
+ "num_train_epochs": 7,
157
+ "temperature": 8
158
+ }
159
+ }
run-1/checkpoint-672/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846c082dceab6915234c6a2aabd837d23103edc099022b8d844818635e620020
3
+ size 4920
run-1/checkpoint-672/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-96/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f72e368a0735e0614c97feb6fba15c5d298f68104e7128cf582de5807e255027
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12de485927495a54a42aa2b293b0adf412d00170117941f81fbe4835dce02946
3
  size 17549312
run-2/checkpoint-96/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b5aece18803da9b55ab957b14fbd401ac7b58d03d968f7138245dd9246cea4e
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54062a3bdb564650b4d45cbadb566f422765f4cfb994c34b40c126e7ea2017ac
3
  size 35122746
run-2/checkpoint-96/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:773b7d3c411900ce077bec534b9aeadeaba63fc284f46150d41278a6fa17bea6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c79e750cc582cae4735d180e15f64e99d57267a624826e5fed0ad71eed76c34
3
  size 1064
run-2/checkpoint-96/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,36 +10,36 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 3.4041388034820557,
14
- "learning_rate": 8.656503841433308e-07,
15
- "loss": 0.2746,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5,
21
- "eval_f1": 0.6666666666666666,
22
- "eval_loss": 0.25451573729515076,
23
- "eval_precision": 0.5,
24
- "eval_recall": 1.0,
25
- "eval_runtime": 28.1178,
26
- "eval_samples_per_second": 36.347,
27
- "eval_steps_per_second": 1.138,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 480,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 5,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
- "alpha": 0.04277939142155329,
41
- "learning_rate": 1.0820629801791635e-06,
42
- "num_train_epochs": 5,
43
- "temperature": 6
44
  }
45
  }
 
1
  {
2
+ "best_metric": 0.5499021526418787,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.663398027420044,
14
+ "learning_rate": 0.00027867701883546326,
15
+ "loss": 0.2787,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5499021526418787,
21
+ "eval_f1": 0.24092409240924093,
22
+ "eval_loss": 0.25340619683265686,
23
+ "eval_precision": 0.7684210526315789,
24
+ "eval_recall": 0.14285714285714285,
25
+ "eval_runtime": 28.5207,
26
+ "eval_samples_per_second": 35.834,
27
+ "eval_steps_per_second": 1.122,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 864,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 9,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
+ "alpha": 0.3483609408663828,
41
+ "learning_rate": 0.0003135116461898962,
42
+ "num_train_epochs": 9,
43
+ "temperature": 7
44
  }
45
  }
run-2/checkpoint-96/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd5c3fbfe040730ec9bc64036d615ae00f3fe41347ad2df02c2ead7e8faec2da
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7adcc08e8d37cad0335688bbf483a90600b348a62bf08e2b72452e95967f2a4
3
  size 4920
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:846c082dceab6915234c6a2aabd837d23103edc099022b8d844818635e620020
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7adcc08e8d37cad0335688bbf483a90600b348a62bf08e2b72452e95967f2a4
3
  size 4920