ribesstefano commited on
Commit
32f55e6
·
1 Parent(s): ba38217

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf4104191ae6939313ef1a6fdecaec553787a027b37fcc98d548b1790f4285d
3
  size 1112334200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0892cb2e571b44d5942a997aa35ba80bcc592ce3bb5193d397e7316e056b6da0
3
  size 1112334200
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ea85e208a6daebb8bc69efdf1219ab403f97d9ba2a7aaf97f4393b3f6aba14a
3
  size 4999698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a5d797ada3719b6ed3840e3ac9c82afabd94f3ba7665e77d016c8d3043ff1b
3
  size 4999698
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f79b92daf8dc6726d9f2cc5c6eab1fd252a5802172ec5a5525da916ac369cb0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:454eadf6d869c3ef671c7d303b75726820b52ef79f805284b0badaa30198f863
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b80b041a6a7fbae957f1842994caa209750d940772340214766874c83b0cd23c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc0873bf8f3cb7a7665b601ff76bd0007b3fd5c53fba8a3b9f7c798f7147db4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4875621890547264,
3
- "best_model_checkpoint": "/RuleBert-v0.0-k0/checkpoint-100",
4
- "epoch": 0.22123893805309736,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -53,6 +53,144 @@
53
  "eval_samples_per_second": 88.003,
54
  "eval_steps_per_second": 2.933,
55
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
  ],
58
  "logging_steps": 25,
@@ -60,7 +198,7 @@
60
  "num_input_tokens_seen": 0,
61
  "num_train_epochs": 18,
62
  "save_steps": 100,
63
- "total_flos": 210568219852800.0,
64
  "train_batch_size": 8,
65
  "trial_name": null,
66
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5050071530758227,
3
+ "best_model_checkpoint": "/RuleBert-v0.0-k0/checkpoint-400",
4
+ "epoch": 0.8849557522123894,
5
  "eval_steps": 50,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
53
  "eval_samples_per_second": 88.003,
54
  "eval_steps_per_second": 2.933,
55
  "step": 100
56
+ },
57
+ {
58
+ "epoch": 0.28,
59
+ "learning_rate": 1.9987954562051724e-05,
60
+ "loss": 0.3947,
61
+ "step": 125
62
+ },
63
+ {
64
+ "epoch": 0.33,
65
+ "learning_rate": 1.998265610184716e-05,
66
+ "loss": 0.3621,
67
+ "step": 150
68
+ },
69
+ {
70
+ "epoch": 0.33,
71
+ "eval_accuracy": 0.008333333333333333,
72
+ "eval_f1": 0.49451303155006865,
73
+ "eval_loss": 0.3777387738227844,
74
+ "eval_roc_auc": 0.6711924439197166,
75
+ "eval_runtime": 2.8618,
76
+ "eval_samples_per_second": 83.864,
77
+ "eval_steps_per_second": 2.795,
78
+ "step": 150
79
+ },
80
+ {
81
+ "epoch": 0.39,
82
+ "learning_rate": 1.997639549247016e-05,
83
+ "loss": 0.3516,
84
+ "step": 175
85
+ },
86
+ {
87
+ "epoch": 0.44,
88
+ "learning_rate": 1.9969173337331283e-05,
89
+ "loss": 0.3541,
90
+ "step": 200
91
+ },
92
+ {
93
+ "epoch": 0.44,
94
+ "eval_accuracy": 0.025,
95
+ "eval_f1": 0.4965277777777778,
96
+ "eval_loss": 0.3699280917644501,
97
+ "eval_roc_auc": 0.6713821892393321,
98
+ "eval_runtime": 2.8771,
99
+ "eval_samples_per_second": 83.417,
100
+ "eval_steps_per_second": 2.781,
101
+ "step": 200
102
+ },
103
+ {
104
+ "epoch": 0.5,
105
+ "learning_rate": 1.9960990332516875e-05,
106
+ "loss": 0.3512,
107
+ "step": 225
108
+ },
109
+ {
110
+ "epoch": 0.55,
111
+ "learning_rate": 1.995184726672197e-05,
112
+ "loss": 0.3322,
113
+ "step": 250
114
+ },
115
+ {
116
+ "epoch": 0.55,
117
+ "eval_accuracy": 0.0,
118
+ "eval_f1": 0.4925675675675676,
119
+ "eval_loss": 0.36883607506752014,
120
+ "eval_roc_auc": 0.6711713611064261,
121
+ "eval_runtime": 2.8206,
122
+ "eval_samples_per_second": 85.088,
123
+ "eval_steps_per_second": 2.836,
124
+ "step": 250
125
+ },
126
+ {
127
+ "epoch": 0.61,
128
+ "learning_rate": 1.9941745021174284e-05,
129
+ "loss": 0.3399,
130
+ "step": 275
131
+ },
132
+ {
133
+ "epoch": 0.66,
134
+ "learning_rate": 1.9930684569549265e-05,
135
+ "loss": 0.3276,
136
+ "step": 300
137
+ },
138
+ {
139
+ "epoch": 0.66,
140
+ "eval_accuracy": 0.008333333333333333,
141
+ "eval_f1": 0.49861495844875353,
142
+ "eval_loss": 0.36757946014404297,
143
+ "eval_roc_auc": 0.6724995783437342,
144
+ "eval_runtime": 2.7494,
145
+ "eval_samples_per_second": 87.293,
146
+ "eval_steps_per_second": 2.91,
147
+ "step": 300
148
+ },
149
+ {
150
+ "epoch": 0.72,
151
+ "learning_rate": 1.991866697787626e-05,
152
+ "loss": 0.3467,
153
+ "step": 325
154
+ },
155
+ {
156
+ "epoch": 0.77,
157
+ "learning_rate": 1.990569340443577e-05,
158
+ "loss": 0.3078,
159
+ "step": 350
160
+ },
161
+ {
162
+ "epoch": 0.77,
163
+ "eval_accuracy": 0.016666666666666666,
164
+ "eval_f1": 0.5012285012285013,
165
+ "eval_loss": 0.367951363325119,
166
+ "eval_roc_auc": 0.6728632568729971,
167
+ "eval_runtime": 2.7213,
168
+ "eval_samples_per_second": 88.192,
169
+ "eval_steps_per_second": 2.94,
170
+ "step": 350
171
+ },
172
+ {
173
+ "epoch": 0.83,
174
+ "learning_rate": 1.989176509964781e-05,
175
+ "loss": 0.3211,
176
+ "step": 375
177
+ },
178
+ {
179
+ "epoch": 0.88,
180
+ "learning_rate": 1.9876883405951378e-05,
181
+ "loss": 0.3293,
182
+ "step": 400
183
+ },
184
+ {
185
+ "epoch": 0.88,
186
+ "eval_accuracy": 0.0375,
187
+ "eval_f1": 0.5050071530758227,
188
+ "eval_loss": 0.36856791377067566,
189
+ "eval_roc_auc": 0.6734061393152301,
190
+ "eval_runtime": 2.704,
191
+ "eval_samples_per_second": 88.757,
192
+ "eval_steps_per_second": 2.959,
193
+ "step": 400
194
  }
195
  ],
196
  "logging_steps": 25,
 
198
  "num_input_tokens_seen": 0,
199
  "num_train_epochs": 18,
200
  "save_steps": 100,
201
+ "total_flos": 842272879411200.0,
202
  "train_batch_size": 8,
203
  "trial_name": null,
204
  "trial_params": null