xuancoblab2023 commited on
Commit
26480cb
·
verified ·
1 Parent(s): df6677d

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1710507175.af351b5edcca.3142.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f06198ee44361a23f2d4fd39293a6cc026d37caaed7434826721619e1347790
3
- size 5178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1aa6c609edc24d38dae0e285acfaaaf393dbce51c764813a6cf579d32050175
3
+ size 5712
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76220d7fcfb19b8bac2ae99022cae216a90b830bf00b3012d8b613a63d0bf947
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc44da7da9ee7483223f84fc107bea919ad244d54f5eedf49f42aaf1954cee3
3
  size 17549312
run-3/checkpoint-594/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36b096d092545db3f6929eb2356bdf184c04e49667b8811fe834be8ad526fae3
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc44da7da9ee7483223f84fc107bea919ad244d54f5eedf49f42aaf1954cee3
3
  size 17549312
run-3/checkpoint-594/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75219665ac76bc792388fc4fd95a2d7984f5600f651f2bb63a5d1eb4a184428b
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00bd81260407d2dcacc2c135676399912a16b5ea23b5442d6377ef454fc7c84
3
  size 35122746
run-3/checkpoint-594/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ca7ec72111a14ce7876f703e982520232c709c07938716ba856a9bac936336c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d4f642fea8f45d839698b4d161b45cf4f8c426e82e8780e36861baa6efe2aa
3
  size 1064
run-3/checkpoint-594/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5564356435643565,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-594",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,49 +10,49 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.8068264722824097,
14
- "learning_rate": 1.8960352341732743e-05,
15
- "loss": 0.5426,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.500990099009901,
21
- "eval_loss": 0.5363946557044983,
22
- "eval_runtime": 55.6857,
23
- "eval_samples_per_second": 9.069,
24
- "eval_steps_per_second": 0.287,
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 0.7480373978614807,
30
- "learning_rate": 1.580029361811062e-05,
31
- "loss": 0.5354,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.5564356435643565,
37
- "eval_loss": 0.5311887264251709,
38
- "eval_runtime": 55.3283,
39
- "eval_samples_per_second": 9.127,
40
- "eval_steps_per_second": 0.289,
41
  "step": 594
42
  }
43
  ],
44
  "logging_steps": 500,
45
- "max_steps": 2079,
46
  "num_input_tokens_seen": 0,
47
- "num_train_epochs": 7,
48
  "save_steps": 500,
49
  "total_flos": 1555686566280.0,
50
  "train_batch_size": 32,
51
  "trial_name": null,
52
  "trial_params": {
53
- "alpha": 0.7228353037734928,
54
- "learning_rate": 2.212041106535487e-05,
55
- "num_train_epochs": 7,
56
- "temperature": 7
57
  }
58
  }
 
1
  {
2
+ "best_metric": 0.6,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-594",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.9984288811683655,
14
+ "learning_rate": 9.995670829688553e-05,
15
+ "loss": 0.5706,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5128712871287129,
21
+ "eval_loss": 0.5624967217445374,
22
+ "eval_runtime": 13.6049,
23
+ "eval_samples_per_second": 37.119,
24
+ "eval_steps_per_second": 1.176,
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 0.9848873615264893,
30
+ "learning_rate": 4.9978354148442764e-05,
31
+ "loss": 0.5602,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.6,
37
+ "eval_loss": 0.5566068291664124,
38
+ "eval_runtime": 13.9891,
39
+ "eval_samples_per_second": 36.099,
40
+ "eval_steps_per_second": 1.144,
41
  "step": 594
42
  }
43
  ],
44
  "logging_steps": 500,
45
+ "max_steps": 891,
46
  "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 3,
48
  "save_steps": 500,
49
  "total_flos": 1555686566280.0,
50
  "train_batch_size": 32,
51
  "trial_name": null,
52
  "trial_params": {
53
+ "alpha": 0.7785816803005383,
54
+ "learning_rate": 0.0001499350624453283,
55
+ "num_train_epochs": 3,
56
+ "temperature": 16
57
  }
58
  }
run-3/checkpoint-594/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef26fd1ddbade3db0c49006d3a63755bd6dceb7ec335f1b7b6f7abcb501f62b6
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9decda4f5411e0ed12b24ed68c82ec0237bdb8018a369738f83b0ca5fb77d628
3
  size 4984