File size: 2,301 Bytes
e775df5
 
 
 
abbd8fc
e775df5
 
 
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
 
abbd8fc
 
 
e775df5
 
abbd8fc
 
 
 
 
 
e775df5
 
abbd8fc
 
 
 
 
 
e775df5
 
abbd8fc
e775df5
abbd8fc
e775df5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 20.0,
  "global_step": 7300,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.37,
      "learning_rate": 4.657534246575342e-05,
      "loss": 3.0871,
      "step": 500
    },
    {
      "epoch": 2.74,
      "learning_rate": 4.3150684931506855e-05,
      "loss": 2.5357,
      "step": 1000
    },
    {
      "epoch": 4.11,
      "learning_rate": 3.9726027397260274e-05,
      "loss": 2.4276,
      "step": 1500
    },
    {
      "epoch": 5.48,
      "learning_rate": 3.63013698630137e-05,
      "loss": 2.3554,
      "step": 2000
    },
    {
      "epoch": 6.85,
      "learning_rate": 3.287671232876712e-05,
      "loss": 2.3018,
      "step": 2500
    },
    {
      "epoch": 8.22,
      "learning_rate": 2.945205479452055e-05,
      "loss": 2.2534,
      "step": 3000
    },
    {
      "epoch": 9.59,
      "learning_rate": 2.6027397260273973e-05,
      "loss": 2.2174,
      "step": 3500
    },
    {
      "epoch": 10.96,
      "learning_rate": 2.2602739726027396e-05,
      "loss": 2.1862,
      "step": 4000
    },
    {
      "epoch": 12.33,
      "learning_rate": 1.9178082191780822e-05,
      "loss": 2.1502,
      "step": 4500
    },
    {
      "epoch": 13.7,
      "learning_rate": 1.5753424657534248e-05,
      "loss": 2.1359,
      "step": 5000
    },
    {
      "epoch": 15.07,
      "learning_rate": 1.2328767123287671e-05,
      "loss": 2.1134,
      "step": 5500
    },
    {
      "epoch": 16.44,
      "learning_rate": 8.904109589041095e-06,
      "loss": 2.0959,
      "step": 6000
    },
    {
      "epoch": 17.81,
      "learning_rate": 5.479452054794521e-06,
      "loss": 2.0852,
      "step": 6500
    },
    {
      "epoch": 19.18,
      "learning_rate": 2.054794520547945e-06,
      "loss": 2.0761,
      "step": 7000
    },
    {
      "epoch": 20.0,
      "step": 7300,
      "total_flos": 1.903251161088e+16,
      "train_loss": 2.27839626364512,
      "train_runtime": 6672.7861,
      "train_samples_per_second": 5.458,
      "train_steps_per_second": 1.094
    }
  ],
  "max_steps": 7300,
  "num_train_epochs": 20,
  "total_flos": 1.903251161088e+16,
  "trial_name": null,
  "trial_params": null
}