Training in progress, epoch 1
Browse files- logs/events.out.tfevents.1709621162.404ecd1cf399.26000.16 +2 -2
- logs/events.out.tfevents.1709621520.404ecd1cf399.26000.17 +3 -0
- logs/events.out.tfevents.1709621638.404ecd1cf399.26000.18 +3 -0
- logs/events.out.tfevents.1709621754.404ecd1cf399.26000.19 +3 -0
- logs/events.out.tfevents.1709621872.404ecd1cf399.26000.20 +3 -0
- logs/events.out.tfevents.1709621988.404ecd1cf399.26000.21 +3 -0
- logs/events.out.tfevents.1709622105.404ecd1cf399.26000.22 +3 -0
- logs/events.out.tfevents.1709622224.404ecd1cf399.26000.23 +3 -0
- logs/events.out.tfevents.1709622341.404ecd1cf399.26000.24 +3 -0
- logs/events.out.tfevents.1709622459.404ecd1cf399.26000.25 +3 -0
- logs/events.out.tfevents.1709622577.404ecd1cf399.26000.26 +3 -0
- logs/events.out.tfevents.1709622697.404ecd1cf399.26000.27 +3 -0
- model.safetensors +1 -1
- run-10/checkpoint-288/config.json +34 -0
- run-10/checkpoint-288/model.safetensors +3 -0
- run-10/checkpoint-288/optimizer.pt +3 -0
- run-10/checkpoint-288/rng_state.pth +3 -0
- run-10/checkpoint-288/scheduler.pt +3 -0
- run-10/checkpoint-288/special_tokens_map.json +7 -0
- run-10/checkpoint-288/tokenizer.json +0 -0
- run-10/checkpoint-288/tokenizer_config.json +57 -0
- run-10/checkpoint-288/trainer_state.json +83 -0
- run-10/checkpoint-288/training_args.bin +3 -0
- run-10/checkpoint-288/vocab.txt +0 -0
- run-21/checkpoint-96/config.json +34 -0
- run-21/checkpoint-96/model.safetensors +3 -0
- run-21/checkpoint-96/optimizer.pt +3 -0
- run-21/checkpoint-96/rng_state.pth +3 -0
- run-21/checkpoint-96/scheduler.pt +3 -0
- run-21/checkpoint-96/special_tokens_map.json +7 -0
- run-21/checkpoint-96/tokenizer.json +0 -0
- run-21/checkpoint-96/tokenizer_config.json +57 -0
- run-21/checkpoint-96/trainer_state.json +45 -0
- run-21/checkpoint-96/training_args.bin +3 -0
- run-21/checkpoint-96/vocab.txt +0 -0
- training_args.bin +1 -1
logs/events.out.tfevents.1709621162.404ecd1cf399.26000.16
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f188aab21c187430a049f0cb45d7be4ff226f7a643fb22094c1b513edbe3290
|
3 |
+
size 7034
|
logs/events.out.tfevents.1709621520.404ecd1cf399.26000.17
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c9f443ea6d1d3319ca5cfeff866cbad95d207de761827824356f08b8566c031
|
3 |
+
size 5314
|
logs/events.out.tfevents.1709621638.404ecd1cf399.26000.18
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81ce6158d6aa6340bb86067d40de8ad5bcca52a5360b90c636c54e9151f0834f
|
3 |
+
size 5314
|
logs/events.out.tfevents.1709621754.404ecd1cf399.26000.19
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa54aa96f058cf0ad96ff3a163b9e620867a3c93faa4253808a3ee297687b65
|
3 |
+
size 5314
|
logs/events.out.tfevents.1709621872.404ecd1cf399.26000.20
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f54b8ad32bb77f3edd0962f26324a60cca56b98a7ae17cd4721427aea1686a54
|
3 |
+
size 5315
|
logs/events.out.tfevents.1709621988.404ecd1cf399.26000.21
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c864e36fa21afb69b83dc1eb0f2270c1e8b2de14744a3b41a9d2eb8269a55c9d
|
3 |
+
size 5314
|
logs/events.out.tfevents.1709622105.404ecd1cf399.26000.22
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:773319b7521f22549a7a8b17ff140feb1a90416bee36cfdf45128a017d4dbd53
|
3 |
+
size 5315
|
logs/events.out.tfevents.1709622224.404ecd1cf399.26000.23
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d52bd5fd37dfc611b4b637516c60ecd5c7388580f62b6db2e6e20b9cb14c47d
|
3 |
+
size 5315
|
logs/events.out.tfevents.1709622341.404ecd1cf399.26000.24
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6855da1bc56c58c69c9b9c23403d8c7d4f91347c1bf03382bd673668f5f49fbc
|
3 |
+
size 5315
|
logs/events.out.tfevents.1709622459.404ecd1cf399.26000.25
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ed4c7f97b576fe13069a4307c1886daa638f6125083ecb85516722d06c05663
|
3 |
+
size 5311
|
logs/events.out.tfevents.1709622577.404ecd1cf399.26000.26
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5893da421a5e6fe626da3bc647153fcbc26a75d404672693f0175d8c2dd8c36d
|
3 |
+
size 5315
|
logs/events.out.tfevents.1709622697.404ecd1cf399.26000.27
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41b13c9a120729e9f5b44a32bc258f58c815c20cb691f744147e7a047aa13d7b
|
3 |
+
size 5314
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0340ebc32cb2d6702dc3fa2be07246d93c8e0bf967f78c96478e03a388469de0
|
3 |
size 17549312
|
run-10/checkpoint-288/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.38.2",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-10/checkpoint-288/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90544cd346f84f4589af99a78110877a58e704cb23b6f374ebdfba5b1f7654c5
|
3 |
+
size 17549312
|
run-10/checkpoint-288/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72c3fc2822ded408ee92a83ac988187d1edba9601e41f58ea728289e3b9381cb
|
3 |
+
size 35122746
|
run-10/checkpoint-288/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba3c2bb1d96a39a28ae1b66f234e351e3c13c700fc97347efd6308798b151790
|
3 |
+
size 14054
|
run-10/checkpoint-288/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c115d3f4c35a47d3adb59e1f87f96436fbc076b7e6a682ba8005683cfea2d74
|
3 |
+
size 1064
|
run-10/checkpoint-288/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-10/checkpoint-288/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-10/checkpoint-288/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
run-10/checkpoint-288/trainer_state.json
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8297455968688845,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-288",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 288,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 1.9170680046081543,
|
14 |
+
"learning_rate": 0.0005212663608648181,
|
15 |
+
"loss": 0.5641,
|
16 |
+
"step": 96
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.799412915851272,
|
21 |
+
"eval_f1": 0.8197009674582234,
|
22 |
+
"eval_loss": 0.47259676456451416,
|
23 |
+
"eval_precision": 0.744408945686901,
|
24 |
+
"eval_recall": 0.9119373776908023,
|
25 |
+
"eval_runtime": 30.8424,
|
26 |
+
"eval_samples_per_second": 33.136,
|
27 |
+
"eval_steps_per_second": 1.038,
|
28 |
+
"step": 96
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"grad_norm": 2.4848690032958984,
|
33 |
+
"learning_rate": 0.00026063318043240905,
|
34 |
+
"loss": 0.456,
|
35 |
+
"step": 192
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.824853228962818,
|
40 |
+
"eval_f1": 0.8406055209260909,
|
41 |
+
"eval_loss": 0.4349122643470764,
|
42 |
+
"eval_precision": 0.7712418300653595,
|
43 |
+
"eval_recall": 0.923679060665362,
|
44 |
+
"eval_runtime": 31.1515,
|
45 |
+
"eval_samples_per_second": 32.807,
|
46 |
+
"eval_steps_per_second": 1.027,
|
47 |
+
"step": 192
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"grad_norm": 4.3911333084106445,
|
52 |
+
"learning_rate": 0.0,
|
53 |
+
"loss": 0.4063,
|
54 |
+
"step": 288
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.8297455968688845,
|
59 |
+
"eval_f1": 0.8421052631578947,
|
60 |
+
"eval_loss": 0.42057597637176514,
|
61 |
+
"eval_precision": 0.7851099830795262,
|
62 |
+
"eval_recall": 0.9080234833659491,
|
63 |
+
"eval_runtime": 32.1366,
|
64 |
+
"eval_samples_per_second": 31.802,
|
65 |
+
"eval_steps_per_second": 0.996,
|
66 |
+
"step": 288
|
67 |
+
}
|
68 |
+
],
|
69 |
+
"logging_steps": 500,
|
70 |
+
"max_steps": 288,
|
71 |
+
"num_input_tokens_seen": 0,
|
72 |
+
"num_train_epochs": 3,
|
73 |
+
"save_steps": 500,
|
74 |
+
"total_flos": 707085591840.0,
|
75 |
+
"train_batch_size": 32,
|
76 |
+
"trial_name": null,
|
77 |
+
"trial_params": {
|
78 |
+
"alpha": 0.9610512963031814,
|
79 |
+
"learning_rate": 0.0007818995412972273,
|
80 |
+
"num_train_epochs": 3,
|
81 |
+
"temperature": 24
|
82 |
+
}
|
83 |
+
}
|
run-10/checkpoint-288/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c48cb920a64a7322ad1d6d741321a643be4ed10af970a24a154150a0def2990
|
3 |
+
size 4920
|
run-10/checkpoint-288/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-21/checkpoint-96/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.38.2",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-21/checkpoint-96/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0340ebc32cb2d6702dc3fa2be07246d93c8e0bf967f78c96478e03a388469de0
|
3 |
+
size 17549312
|
run-21/checkpoint-96/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c513239be26577cef37b2a5c5921b280d68af12a23bbcc48e367b3d5257fef7e
|
3 |
+
size 35122746
|
run-21/checkpoint-96/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24aa86019b8aea1c551cc1adaf38c4db2fc01de75a22af312230f6b592e0fd81
|
3 |
+
size 14054
|
run-21/checkpoint-96/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c973b1c012d03414b773f9d4a120a97ecb91fe35ce74c8cc2ee1c6e41f7efae6
|
3 |
+
size 1064
|
run-21/checkpoint-96/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-21/checkpoint-96/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-21/checkpoint-96/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
run-21/checkpoint-96/trainer_state.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7886497064579256,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-21/checkpoint-96",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 96,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 1.274248719215393,
|
14 |
+
"learning_rate": 0.00018849707699065689,
|
15 |
+
"loss": 0.5008,
|
16 |
+
"step": 96
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7886497064579256,
|
21 |
+
"eval_f1": 0.807142857142857,
|
22 |
+
"eval_loss": 0.4340507984161377,
|
23 |
+
"eval_precision": 0.7422003284072249,
|
24 |
+
"eval_recall": 0.8845401174168297,
|
25 |
+
"eval_runtime": 30.5806,
|
26 |
+
"eval_samples_per_second": 33.42,
|
27 |
+
"eval_steps_per_second": 1.046,
|
28 |
+
"step": 96
|
29 |
+
}
|
30 |
+
],
|
31 |
+
"logging_steps": 500,
|
32 |
+
"max_steps": 480,
|
33 |
+
"num_input_tokens_seen": 0,
|
34 |
+
"num_train_epochs": 5,
|
35 |
+
"save_steps": 500,
|
36 |
+
"total_flos": 235695197280.0,
|
37 |
+
"train_batch_size": 32,
|
38 |
+
"trial_name": null,
|
39 |
+
"trial_params": {
|
40 |
+
"alpha": 0.7341174287180767,
|
41 |
+
"learning_rate": 0.0002356213462383211,
|
42 |
+
"num_train_epochs": 5,
|
43 |
+
"temperature": 30
|
44 |
+
}
|
45 |
+
}
|
run-21/checkpoint-96/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cda209f700b90b88269aa2223f5df9860e130b74cffee3dfa38a80b8c6373616
|
3 |
+
size 4920
|
run-21/checkpoint-96/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cda209f700b90b88269aa2223f5df9860e130b74cffee3dfa38a80b8c6373616
|
3 |
size 4920
|