suehyunpark commited on
Commit
c7ee5f6
·
verified ·
1 Parent(s): a4a7960

Upload after 3 epochs

Browse files
Files changed (1) hide show
  1. trainer_state.json +150 -29
trainer_state.json CHANGED
@@ -1,56 +1,177 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.3333333333333333,
13
- "grad_norm": 0.6868907380200201,
14
- "learning_rate": 1e-05,
15
- "loss": 0.1371,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 0.6666666666666666,
20
- "grad_norm": 0.6587169132820502,
21
- "learning_rate": 5e-06,
22
- "loss": 0.1343,
23
  "step": 2
24
  },
25
  {
26
- "epoch": 1.0,
27
- "grad_norm": 0.6223967331437048,
28
- "learning_rate": 0.0,
29
- "loss": 0.134,
30
  "step": 3
31
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  {
33
  "epoch": 1.0,
34
- "eval_loss": 0.09965373575687408,
35
- "eval_runtime": 2.9216,
36
- "eval_samples_per_second": 3.423,
37
- "eval_steps_per_second": 0.342,
38
- "step": 3
39
  },
40
  {
41
  "epoch": 1.0,
42
- "step": 3,
43
- "total_flos": 484851810304.0,
44
- "train_loss": 0.1351288358370463,
45
- "train_runtime": 199.4094,
46
- "train_samples_per_second": 0.863,
47
- "train_steps_per_second": 0.015
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
  ],
50
  "logging_steps": 1,
51
- "max_steps": 3,
52
  "num_input_tokens_seen": 0,
53
- "num_train_epochs": 1,
54
  "save_steps": 500,
55
  "stateful_callbacks": {
56
  "TrainerControl": {
@@ -64,8 +185,8 @@
64
  "attributes": {}
65
  }
66
  },
67
- "total_flos": 484851810304.0,
68
- "train_batch_size": 16,
69
  "trial_name": null,
70
  "trial_params": null
71
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.16666666666666666,
13
+ "grad_norm": 280.6432626406138,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.9518,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.3333333333333333,
20
+ "grad_norm": 268.34909690297195,
21
+ "learning_rate": 1e-05,
22
+ "loss": 2.8263,
23
  "step": 2
24
  },
25
  {
26
+ "epoch": 0.5,
27
+ "grad_norm": 50.75311462024632,
28
+ "learning_rate": 9.903926402016153e-06,
29
+ "loss": 1.1917,
30
  "step": 3
31
  },
32
+ {
33
+ "epoch": 0.6666666666666666,
34
+ "grad_norm": 22.75327219021691,
35
+ "learning_rate": 9.619397662556434e-06,
36
+ "loss": 0.8476,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.8333333333333334,
41
+ "grad_norm": 6.642460322802831,
42
+ "learning_rate": 9.157348061512728e-06,
43
+ "loss": 0.541,
44
+ "step": 5
45
+ },
46
  {
47
  "epoch": 1.0,
48
+ "grad_norm": 37.864469102940696,
49
+ "learning_rate": 8.535533905932739e-06,
50
+ "loss": 0.5717,
51
+ "step": 6
 
52
  },
53
  {
54
  "epoch": 1.0,
55
+ "eval_loss": 0.3336825966835022,
56
+ "eval_runtime": 1.0612,
57
+ "eval_samples_per_second": 9.423,
58
+ "eval_steps_per_second": 2.827,
59
+ "step": 6
60
+ },
61
+ {
62
+ "epoch": 1.1666666666666667,
63
+ "grad_norm": 8.475401484959521,
64
+ "learning_rate": 7.777851165098012e-06,
65
+ "loss": 0.3759,
66
+ "step": 7
67
+ },
68
+ {
69
+ "epoch": 1.3333333333333333,
70
+ "grad_norm": 2.4494815853787526,
71
+ "learning_rate": 6.913417161825449e-06,
72
+ "loss": 0.2862,
73
+ "step": 8
74
+ },
75
+ {
76
+ "epoch": 1.5,
77
+ "grad_norm": 3.7056738008000853,
78
+ "learning_rate": 5.975451610080643e-06,
79
+ "loss": 0.2859,
80
+ "step": 9
81
+ },
82
+ {
83
+ "epoch": 1.6666666666666665,
84
+ "grad_norm": 2.528639375054547,
85
+ "learning_rate": 5e-06,
86
+ "loss": 0.2525,
87
+ "step": 10
88
+ },
89
+ {
90
+ "epoch": 1.8333333333333335,
91
+ "grad_norm": 1.681034868123111,
92
+ "learning_rate": 4.02454838991936e-06,
93
+ "loss": 0.2549,
94
+ "step": 11
95
+ },
96
+ {
97
+ "epoch": 2.0,
98
+ "grad_norm": 1.9554282109403005,
99
+ "learning_rate": 3.0865828381745515e-06,
100
+ "loss": 0.2654,
101
+ "step": 12
102
+ },
103
+ {
104
+ "epoch": 2.0,
105
+ "eval_loss": 0.2001432627439499,
106
+ "eval_runtime": 1.1953,
107
+ "eval_samples_per_second": 8.366,
108
+ "eval_steps_per_second": 2.51,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 2.1666666666666665,
113
+ "grad_norm": 1.2686802309348975,
114
+ "learning_rate": 2.2221488349019903e-06,
115
+ "loss": 0.2087,
116
+ "step": 13
117
+ },
118
+ {
119
+ "epoch": 2.3333333333333335,
120
+ "grad_norm": 1.199423147704983,
121
+ "learning_rate": 1.4644660940672628e-06,
122
+ "loss": 0.2293,
123
+ "step": 14
124
+ },
125
+ {
126
+ "epoch": 2.5,
127
+ "grad_norm": 1.0458147461844773,
128
+ "learning_rate": 8.426519384872733e-07,
129
+ "loss": 0.2092,
130
+ "step": 15
131
+ },
132
+ {
133
+ "epoch": 2.6666666666666665,
134
+ "grad_norm": 0.9946553061597486,
135
+ "learning_rate": 3.8060233744356634e-07,
136
+ "loss": 0.1999,
137
+ "step": 16
138
+ },
139
+ {
140
+ "epoch": 2.8333333333333335,
141
+ "grad_norm": 0.8890781121405242,
142
+ "learning_rate": 9.607359798384785e-08,
143
+ "loss": 0.2357,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 3.0,
148
+ "grad_norm": 0.9030643285074138,
149
+ "learning_rate": 0.0,
150
+ "loss": 0.1929,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 3.0,
155
+ "eval_loss": 0.19034340977668762,
156
+ "eval_runtime": 0.9709,
157
+ "eval_samples_per_second": 10.3,
158
+ "eval_steps_per_second": 3.09,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 3.0,
163
+ "step": 18,
164
+ "total_flos": 2848171622400.0,
165
+ "train_loss": 0.6625954583287239,
166
+ "train_runtime": 666.9986,
167
+ "train_samples_per_second": 0.774,
168
+ "train_steps_per_second": 0.027
169
  }
170
  ],
171
  "logging_steps": 1,
172
+ "max_steps": 18,
173
  "num_input_tokens_seen": 0,
174
+ "num_train_epochs": 3,
175
  "save_steps": 500,
176
  "stateful_callbacks": {
177
  "TrainerControl": {
 
185
  "attributes": {}
186
  }
187
  },
188
+ "total_flos": 2848171622400.0,
189
+ "train_batch_size": 8,
190
  "trial_name": null,
191
  "trial_params": null
192
  }