File size: 2,908 Bytes
f003420
850bdc8
f003420
850bdc8
f003420
 
 
 
 
 
 
 
850bdc8
 
 
 
f003420
 
 
850bdc8
 
 
 
 
 
 
 
 
f003420
 
 
 
850bdc8
 
 
f003420
 
 
850bdc8
 
 
 
 
 
 
 
 
 
f003420
 
 
850bdc8
 
 
 
f003420
 
 
850bdc8
 
 
 
 
 
 
 
 
 
f003420
 
 
850bdc8
 
 
 
f003420
 
 
850bdc8
 
 
 
 
 
 
 
 
 
f003420
 
 
 
850bdc8
f003420
850bdc8
f003420
850bdc8
 
f003420
 
850bdc8
 
 
 
 
f003420
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
  "best_metric": 0.8365949119373777,
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-22/checkpoint-768",
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 768,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 4.364559650421143,
      "learning_rate": 0.0002529924510479742,
      "loss": 0.5381,
      "step": 192
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.7896281800391389,
      "eval_f1": 0.8071748878923767,
      "eval_loss": 0.45070555806159973,
      "eval_mcc": 0.5890946801852007,
      "eval_precision": 0.7450331125827815,
      "eval_recall": 0.8806262230919765,
      "eval_runtime": 67.3745,
      "eval_samples_per_second": 15.169,
      "eval_steps_per_second": 0.475,
      "step": 192
    },
    {
      "epoch": 2.0,
      "grad_norm": 9.345165252685547,
      "learning_rate": 0.0001686616340319828,
      "loss": 0.4554,
      "step": 384
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.8043052837573386,
      "eval_f1": 0.8127340823970037,
      "eval_loss": 0.4277065694332123,
      "eval_mcc": 0.6110916014068072,
      "eval_precision": 0.7791741472172352,
      "eval_recall": 0.8493150684931506,
      "eval_runtime": 67.2822,
      "eval_samples_per_second": 15.19,
      "eval_steps_per_second": 0.476,
      "step": 384
    },
    {
      "epoch": 3.0,
      "grad_norm": 6.078808307647705,
      "learning_rate": 8.43308170159914e-05,
      "loss": 0.418,
      "step": 576
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.8238747553816047,
      "eval_f1": 0.8387096774193549,
      "eval_loss": 0.4266253113746643,
      "eval_mcc": 0.6589952419988697,
      "eval_precision": 0.7735537190082644,
      "eval_recall": 0.9158512720156555,
      "eval_runtime": 67.2428,
      "eval_samples_per_second": 15.199,
      "eval_steps_per_second": 0.476,
      "step": 576
    },
    {
      "epoch": 4.0,
      "grad_norm": 2.4011917114257812,
      "learning_rate": 0.0,
      "loss": 0.3997,
      "step": 768
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8365949119373777,
      "eval_f1": 0.8477666362807658,
      "eval_loss": 0.41222792863845825,
      "eval_mcc": 0.680559953194408,
      "eval_precision": 0.7935153583617748,
      "eval_recall": 0.9099804305283757,
      "eval_runtime": 67.3829,
      "eval_samples_per_second": 15.167,
      "eval_steps_per_second": 0.475,
      "step": 768
    }
  ],
  "logging_steps": 500,
  "max_steps": 768,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "total_flos": 942780789120.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.885980659186716,
    "learning_rate": 0.0003373232680639656,
    "num_train_epochs": 4,
    "per_device_train_batch_size": 16,
    "temperature": 44
  }
}