louislu9911 commited on
Commit
249c1b4
·
verified ·
1 Parent(s): f65854a

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +168 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.8,
3
+ "train_loss": 0.2766976151615381,
4
+ "train_runtime": 2412.5227,
5
+ "train_samples_per_second": 127.714,
6
+ "train_steps_per_second": 0.013
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.8,
3
+ "train_loss": 0.2766976151615381,
4
+ "train_runtime": 2412.5227,
5
+ "train_samples_per_second": 127.714,
6
+ "train_steps_per_second": 0.013
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8827102803738318,
3
+ "best_model_checkpoint": "MoE-leaf-disease-convnextv2-base-22k-224/checkpoint-5",
4
+ "epoch": 12.8,
5
+ "eval_steps": 500,
6
+ "global_step": 32,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "eval_accuracy": 0.8808411214953271,
14
+ "eval_loss": 0.46136829257011414,
15
+ "eval_runtime": 39.2263,
16
+ "eval_samples_per_second": 54.555,
17
+ "eval_steps_per_second": 0.051,
18
+ "step": 2
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.8827102803738318,
23
+ "eval_loss": 0.34400519728660583,
24
+ "eval_runtime": 40.1116,
25
+ "eval_samples_per_second": 53.351,
26
+ "eval_steps_per_second": 0.05,
27
+ "step": 5
28
+ },
29
+ {
30
+ "epoch": 2.8,
31
+ "eval_accuracy": 0.8827102803738318,
32
+ "eval_loss": 0.344054251909256,
33
+ "eval_runtime": 37.365,
34
+ "eval_samples_per_second": 57.273,
35
+ "eval_steps_per_second": 0.054,
36
+ "step": 7
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "grad_norm": 0.02522450126707554,
41
+ "learning_rate": 3.928571428571429e-05,
42
+ "loss": 0.3155,
43
+ "step": 10
44
+ },
45
+ {
46
+ "epoch": 4.0,
47
+ "eval_accuracy": 0.8827102803738318,
48
+ "eval_loss": 0.3439185917377472,
49
+ "eval_runtime": 38.4322,
50
+ "eval_samples_per_second": 55.682,
51
+ "eval_steps_per_second": 0.052,
52
+ "step": 10
53
+ },
54
+ {
55
+ "epoch": 4.8,
56
+ "eval_accuracy": 0.8827102803738318,
57
+ "eval_loss": 0.34368354082107544,
58
+ "eval_runtime": 38.0602,
59
+ "eval_samples_per_second": 56.227,
60
+ "eval_steps_per_second": 0.053,
61
+ "step": 12
62
+ },
63
+ {
64
+ "epoch": 6.0,
65
+ "eval_accuracy": 0.8827102803738318,
66
+ "eval_loss": 0.343065083026886,
67
+ "eval_runtime": 38.3046,
68
+ "eval_samples_per_second": 55.868,
69
+ "eval_steps_per_second": 0.052,
70
+ "step": 15
71
+ },
72
+ {
73
+ "epoch": 6.8,
74
+ "eval_accuracy": 0.8827102803738318,
75
+ "eval_loss": 0.3426330089569092,
76
+ "eval_runtime": 38.0517,
77
+ "eval_samples_per_second": 56.239,
78
+ "eval_steps_per_second": 0.053,
79
+ "step": 17
80
+ },
81
+ {
82
+ "epoch": 8.0,
83
+ "grad_norm": 0.007871972396969795,
84
+ "learning_rate": 2.1428571428571428e-05,
85
+ "loss": 0.2577,
86
+ "step": 20
87
+ },
88
+ {
89
+ "epoch": 8.0,
90
+ "eval_accuracy": 0.8827102803738318,
91
+ "eval_loss": 0.3421165347099304,
92
+ "eval_runtime": 38.0514,
93
+ "eval_samples_per_second": 56.24,
94
+ "eval_steps_per_second": 0.053,
95
+ "step": 20
96
+ },
97
+ {
98
+ "epoch": 8.8,
99
+ "eval_accuracy": 0.8827102803738318,
100
+ "eval_loss": 0.3418828845024109,
101
+ "eval_runtime": 38.9753,
102
+ "eval_samples_per_second": 54.907,
103
+ "eval_steps_per_second": 0.051,
104
+ "step": 22
105
+ },
106
+ {
107
+ "epoch": 10.0,
108
+ "eval_accuracy": 0.8827102803738318,
109
+ "eval_loss": 0.3416748642921448,
110
+ "eval_runtime": 38.7299,
111
+ "eval_samples_per_second": 55.254,
112
+ "eval_steps_per_second": 0.052,
113
+ "step": 25
114
+ },
115
+ {
116
+ "epoch": 10.8,
117
+ "eval_accuracy": 0.8827102803738318,
118
+ "eval_loss": 0.3415946364402771,
119
+ "eval_runtime": 37.8063,
120
+ "eval_samples_per_second": 56.604,
121
+ "eval_steps_per_second": 0.053,
122
+ "step": 27
123
+ },
124
+ {
125
+ "epoch": 12.0,
126
+ "grad_norm": 0.01667422242462635,
127
+ "learning_rate": 3.5714285714285714e-06,
128
+ "loss": 0.2601,
129
+ "step": 30
130
+ },
131
+ {
132
+ "epoch": 12.0,
133
+ "eval_accuracy": 0.8827102803738318,
134
+ "eval_loss": 0.3415359854698181,
135
+ "eval_runtime": 38.519,
136
+ "eval_samples_per_second": 55.557,
137
+ "eval_steps_per_second": 0.052,
138
+ "step": 30
139
+ },
140
+ {
141
+ "epoch": 12.8,
142
+ "eval_accuracy": 0.8827102803738318,
143
+ "eval_loss": 0.34151995182037354,
144
+ "eval_runtime": 37.7802,
145
+ "eval_samples_per_second": 56.643,
146
+ "eval_steps_per_second": 0.053,
147
+ "step": 32
148
+ },
149
+ {
150
+ "epoch": 12.8,
151
+ "step": 32,
152
+ "total_flos": 0.0,
153
+ "train_loss": 0.2766976151615381,
154
+ "train_runtime": 2412.5227,
155
+ "train_samples_per_second": 127.714,
156
+ "train_steps_per_second": 0.013
157
+ }
158
+ ],
159
+ "logging_steps": 10,
160
+ "max_steps": 32,
161
+ "num_input_tokens_seen": 0,
162
+ "num_train_epochs": 16,
163
+ "save_steps": 500,
164
+ "total_flos": 0.0,
165
+ "train_batch_size": 2000,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }