File size: 2,912 Bytes
77b7432
f720102
77b7432
f720102
77b7432
 
 
 
 
 
 
 
f720102
 
 
 
77b7432
 
 
f720102
 
 
 
 
 
 
 
 
77b7432
 
 
 
f720102
 
 
77b7432
 
 
f720102
77b7432
f720102
 
 
 
 
 
 
 
 
77b7432
 
f720102
 
 
 
77b7432
 
 
f720102
77b7432
f720102
 
 
 
 
 
 
 
 
77b7432
 
f720102
 
 
 
77b7432
 
 
f720102
 
 
 
 
 
 
 
 
 
77b7432
 
 
 
f720102
77b7432
f720102
77b7432
f720102
 
77b7432
 
f720102
 
 
 
 
77b7432
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
  "best_metric": 0.8287671232876712,
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-768",
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 768,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 6.338156223297119,
      "learning_rate": 0.00018913271286694248,
      "loss": 0.5602,
      "step": 192
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.7974559686888454,
      "eval_f1": 0.8172992056487202,
      "eval_loss": 0.450337290763855,
      "eval_mcc": 0.6094644742211153,
      "eval_precision": 0.7443729903536977,
      "eval_recall": 0.9060665362035225,
      "eval_runtime": 66.9138,
      "eval_samples_per_second": 15.273,
      "eval_steps_per_second": 0.478,
      "step": 192
    },
    {
      "epoch": 2.0,
      "grad_norm": 7.966625690460205,
      "learning_rate": 0.00012608847524462832,
      "loss": 0.4665,
      "step": 384
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.821917808219178,
      "eval_f1": 0.8289473684210525,
      "eval_loss": 0.41706037521362305,
      "eval_mcc": 0.6460214094282304,
      "eval_precision": 0.7974683544303798,
      "eval_recall": 0.863013698630137,
      "eval_runtime": 66.4735,
      "eval_samples_per_second": 15.375,
      "eval_steps_per_second": 0.481,
      "step": 384
    },
    {
      "epoch": 3.0,
      "grad_norm": 6.679861545562744,
      "learning_rate": 6.304423762231416e-05,
      "loss": 0.4212,
      "step": 576
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.8268101761252447,
      "eval_f1": 0.8389444949954504,
      "eval_loss": 0.42093411087989807,
      "eval_mcc": 0.6611696946007627,
      "eval_precision": 0.7840136054421769,
      "eval_recall": 0.9021526418786693,
      "eval_runtime": 66.3714,
      "eval_samples_per_second": 15.398,
      "eval_steps_per_second": 0.482,
      "step": 576
    },
    {
      "epoch": 4.0,
      "grad_norm": 3.6309866905212402,
      "learning_rate": 0.0,
      "loss": 0.3946,
      "step": 768
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8287671232876712,
      "eval_f1": 0.8404740200546947,
      "eval_loss": 0.41515475511550903,
      "eval_mcc": 0.6647329775387241,
      "eval_precision": 0.7866894197952219,
      "eval_recall": 0.9021526418786693,
      "eval_runtime": 66.4487,
      "eval_samples_per_second": 15.38,
      "eval_steps_per_second": 0.482,
      "step": 768
    }
  ],
  "logging_steps": 500,
  "max_steps": 768,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "total_flos": 942780789120.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.9715047486461332,
    "learning_rate": 0.00025217695048925664,
    "num_train_epochs": 4,
    "per_device_train_batch_size": 16,
    "temperature": 28
  }
}