somaia02 commited on
Commit
d51c8eb
·
1 Parent(s): 23348c6

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9197a884173d9c36c93a016c2f30534c3f05c90874cb72eed644075307e2fce3
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:065b0553ba7b625058bbc54e3b3e27c491e8ccc08fd588730cb0ecf41b13d909
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eb84df5445e2433de304b15e77a1175375dd2d8c25dc61c78681c28414293e5
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c15daf481efa3729e5a025f1cd3f5b2786cecfaaa7fdaaaf1ae1c82d0daee5
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f78ad885bfca269044336e349d57c368367d27fdbb75021a699360ecf7bb2b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c099333e451994d42ef6a1a4186a67f46e72967ddbe49de900a735186c809291
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48a8d4a8a8e577d9dbdb51f074c9b711b4edce95beedab354e7f35ce094aeb0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd435f2d2df02dd69f00b4ffef1b8f08ed9d2c925d3492fd0d5a1484e32202ff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.41941431164741516,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-4500",
4
- "epoch": 7.3409461663947795,
5
  "eval_steps": 100,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3067,13 +3067,353 @@
3067
  "eval_samples_per_second": 91.356,
3068
  "eval_steps_per_second": 11.468,
3069
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3070
  }
3071
  ],
3072
  "logging_steps": 10,
3073
  "max_steps": 6130,
3074
  "num_train_epochs": 10,
3075
  "save_steps": 500,
3076
- "total_flos": 8507970531753984.0,
3077
  "trial_name": null,
3078
  "trial_params": null
3079
  }
 
1
  {
2
+ "best_metric": 0.4176868200302124,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-5000",
4
+ "epoch": 8.156606851549755,
5
  "eval_steps": 100,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3067
  "eval_samples_per_second": 91.356,
3068
  "eval_steps_per_second": 11.468,
3069
  "step": 4500
3070
+ },
3071
+ {
3072
+ "epoch": 7.36,
3073
+ "learning_rate": 0.0002877442273534636,
3074
+ "loss": 0.4006,
3075
+ "step": 4510
3076
+ },
3077
+ {
3078
+ "epoch": 7.37,
3079
+ "learning_rate": 0.00028596802841918297,
3080
+ "loss": 0.4024,
3081
+ "step": 4520
3082
+ },
3083
+ {
3084
+ "epoch": 7.39,
3085
+ "learning_rate": 0.0002841918294849023,
3086
+ "loss": 0.3959,
3087
+ "step": 4530
3088
+ },
3089
+ {
3090
+ "epoch": 7.41,
3091
+ "learning_rate": 0.0002824156305506217,
3092
+ "loss": 0.3917,
3093
+ "step": 4540
3094
+ },
3095
+ {
3096
+ "epoch": 7.42,
3097
+ "learning_rate": 0.00028063943161634104,
3098
+ "loss": 0.3994,
3099
+ "step": 4550
3100
+ },
3101
+ {
3102
+ "epoch": 7.44,
3103
+ "learning_rate": 0.0002788632326820604,
3104
+ "loss": 0.4246,
3105
+ "step": 4560
3106
+ },
3107
+ {
3108
+ "epoch": 7.46,
3109
+ "learning_rate": 0.00027708703374777977,
3110
+ "loss": 0.3905,
3111
+ "step": 4570
3112
+ },
3113
+ {
3114
+ "epoch": 7.47,
3115
+ "learning_rate": 0.0002753108348134991,
3116
+ "loss": 0.4076,
3117
+ "step": 4580
3118
+ },
3119
+ {
3120
+ "epoch": 7.49,
3121
+ "learning_rate": 0.00027353463587921845,
3122
+ "loss": 0.3924,
3123
+ "step": 4590
3124
+ },
3125
+ {
3126
+ "epoch": 7.5,
3127
+ "learning_rate": 0.00027175843694493784,
3128
+ "loss": 0.3863,
3129
+ "step": 4600
3130
+ },
3131
+ {
3132
+ "epoch": 7.5,
3133
+ "eval_loss": 0.4166851341724396,
3134
+ "eval_runtime": 12.731,
3135
+ "eval_samples_per_second": 91.98,
3136
+ "eval_steps_per_second": 11.547,
3137
+ "step": 4600
3138
+ },
3139
+ {
3140
+ "epoch": 7.52,
3141
+ "learning_rate": 0.0002699822380106572,
3142
+ "loss": 0.3949,
3143
+ "step": 4610
3144
+ },
3145
+ {
3146
+ "epoch": 7.54,
3147
+ "learning_rate": 0.0002682060390763766,
3148
+ "loss": 0.3966,
3149
+ "step": 4620
3150
+ },
3151
+ {
3152
+ "epoch": 7.55,
3153
+ "learning_rate": 0.0002664298401420959,
3154
+ "loss": 0.3899,
3155
+ "step": 4630
3156
+ },
3157
+ {
3158
+ "epoch": 7.57,
3159
+ "learning_rate": 0.00026465364120781525,
3160
+ "loss": 0.41,
3161
+ "step": 4640
3162
+ },
3163
+ {
3164
+ "epoch": 7.59,
3165
+ "learning_rate": 0.00026287744227353465,
3166
+ "loss": 0.3993,
3167
+ "step": 4650
3168
+ },
3169
+ {
3170
+ "epoch": 7.6,
3171
+ "learning_rate": 0.000261101243339254,
3172
+ "loss": 0.4216,
3173
+ "step": 4660
3174
+ },
3175
+ {
3176
+ "epoch": 7.62,
3177
+ "learning_rate": 0.0002593250444049733,
3178
+ "loss": 0.4144,
3179
+ "step": 4670
3180
+ },
3181
+ {
3182
+ "epoch": 7.63,
3183
+ "learning_rate": 0.0002575488454706927,
3184
+ "loss": 0.4117,
3185
+ "step": 4680
3186
+ },
3187
+ {
3188
+ "epoch": 7.65,
3189
+ "learning_rate": 0.00025577264653641206,
3190
+ "loss": 0.4288,
3191
+ "step": 4690
3192
+ },
3193
+ {
3194
+ "epoch": 7.67,
3195
+ "learning_rate": 0.0002539964476021314,
3196
+ "loss": 0.3837,
3197
+ "step": 4700
3198
+ },
3199
+ {
3200
+ "epoch": 7.67,
3201
+ "eval_loss": 0.4177791476249695,
3202
+ "eval_runtime": 12.7781,
3203
+ "eval_samples_per_second": 91.642,
3204
+ "eval_steps_per_second": 11.504,
3205
+ "step": 4700
3206
+ },
3207
+ {
3208
+ "epoch": 7.68,
3209
+ "learning_rate": 0.0002522202486678508,
3210
+ "loss": 0.3894,
3211
+ "step": 4710
3212
+ },
3213
+ {
3214
+ "epoch": 7.7,
3215
+ "learning_rate": 0.00025044404973357013,
3216
+ "loss": 0.4109,
3217
+ "step": 4720
3218
+ },
3219
+ {
3220
+ "epoch": 7.72,
3221
+ "learning_rate": 0.0002486678507992895,
3222
+ "loss": 0.4211,
3223
+ "step": 4730
3224
+ },
3225
+ {
3226
+ "epoch": 7.73,
3227
+ "learning_rate": 0.0002468916518650089,
3228
+ "loss": 0.3984,
3229
+ "step": 4740
3230
+ },
3231
+ {
3232
+ "epoch": 7.75,
3233
+ "learning_rate": 0.00024511545293072826,
3234
+ "loss": 0.3988,
3235
+ "step": 4750
3236
+ },
3237
+ {
3238
+ "epoch": 7.77,
3239
+ "learning_rate": 0.0002433392539964476,
3240
+ "loss": 0.4185,
3241
+ "step": 4760
3242
+ },
3243
+ {
3244
+ "epoch": 7.78,
3245
+ "learning_rate": 0.00024156305506216696,
3246
+ "loss": 0.3986,
3247
+ "step": 4770
3248
+ },
3249
+ {
3250
+ "epoch": 7.8,
3251
+ "learning_rate": 0.00023978685612788633,
3252
+ "loss": 0.4035,
3253
+ "step": 4780
3254
+ },
3255
+ {
3256
+ "epoch": 7.81,
3257
+ "learning_rate": 0.00023801065719360567,
3258
+ "loss": 0.4028,
3259
+ "step": 4790
3260
+ },
3261
+ {
3262
+ "epoch": 7.83,
3263
+ "learning_rate": 0.00023623445825932503,
3264
+ "loss": 0.3998,
3265
+ "step": 4800
3266
+ },
3267
+ {
3268
+ "epoch": 7.83,
3269
+ "eval_loss": 0.42066851258277893,
3270
+ "eval_runtime": 12.801,
3271
+ "eval_samples_per_second": 91.477,
3272
+ "eval_steps_per_second": 11.483,
3273
+ "step": 4800
3274
+ },
3275
+ {
3276
+ "epoch": 7.85,
3277
+ "learning_rate": 0.00023445825932504443,
3278
+ "loss": 0.4137,
3279
+ "step": 4810
3280
+ },
3281
+ {
3282
+ "epoch": 7.86,
3283
+ "learning_rate": 0.0002326820603907638,
3284
+ "loss": 0.3972,
3285
+ "step": 4820
3286
+ },
3287
+ {
3288
+ "epoch": 7.88,
3289
+ "learning_rate": 0.00023090586145648313,
3290
+ "loss": 0.4158,
3291
+ "step": 4830
3292
+ },
3293
+ {
3294
+ "epoch": 7.9,
3295
+ "learning_rate": 0.0002291296625222025,
3296
+ "loss": 0.3972,
3297
+ "step": 4840
3298
+ },
3299
+ {
3300
+ "epoch": 7.91,
3301
+ "learning_rate": 0.00022735346358792187,
3302
+ "loss": 0.4246,
3303
+ "step": 4850
3304
+ },
3305
+ {
3306
+ "epoch": 7.93,
3307
+ "learning_rate": 0.0002255772646536412,
3308
+ "loss": 0.3866,
3309
+ "step": 4860
3310
+ },
3311
+ {
3312
+ "epoch": 7.94,
3313
+ "learning_rate": 0.00022380106571936057,
3314
+ "loss": 0.4359,
3315
+ "step": 4870
3316
+ },
3317
+ {
3318
+ "epoch": 7.96,
3319
+ "learning_rate": 0.00022202486678507994,
3320
+ "loss": 0.386,
3321
+ "step": 4880
3322
+ },
3323
+ {
3324
+ "epoch": 7.98,
3325
+ "learning_rate": 0.0002202486678507993,
3326
+ "loss": 0.38,
3327
+ "step": 4890
3328
+ },
3329
+ {
3330
+ "epoch": 7.99,
3331
+ "learning_rate": 0.00021847246891651864,
3332
+ "loss": 0.3936,
3333
+ "step": 4900
3334
+ },
3335
+ {
3336
+ "epoch": 7.99,
3337
+ "eval_loss": 0.41456684470176697,
3338
+ "eval_runtime": 12.7936,
3339
+ "eval_samples_per_second": 91.53,
3340
+ "eval_steps_per_second": 11.49,
3341
+ "step": 4900
3342
+ },
3343
+ {
3344
+ "epoch": 8.01,
3345
+ "learning_rate": 0.000216696269982238,
3346
+ "loss": 0.3634,
3347
+ "step": 4910
3348
+ },
3349
+ {
3350
+ "epoch": 8.03,
3351
+ "learning_rate": 0.00021492007104795738,
3352
+ "loss": 0.3883,
3353
+ "step": 4920
3354
+ },
3355
+ {
3356
+ "epoch": 8.04,
3357
+ "learning_rate": 0.00021314387211367671,
3358
+ "loss": 0.3864,
3359
+ "step": 4930
3360
+ },
3361
+ {
3362
+ "epoch": 8.06,
3363
+ "learning_rate": 0.00021136767317939608,
3364
+ "loss": 0.3848,
3365
+ "step": 4940
3366
+ },
3367
+ {
3368
+ "epoch": 8.08,
3369
+ "learning_rate": 0.00020959147424511545,
3370
+ "loss": 0.4164,
3371
+ "step": 4950
3372
+ },
3373
+ {
3374
+ "epoch": 8.09,
3375
+ "learning_rate": 0.00020781527531083484,
3376
+ "loss": 0.3963,
3377
+ "step": 4960
3378
+ },
3379
+ {
3380
+ "epoch": 8.11,
3381
+ "learning_rate": 0.00020603907637655418,
3382
+ "loss": 0.3873,
3383
+ "step": 4970
3384
+ },
3385
+ {
3386
+ "epoch": 8.12,
3387
+ "learning_rate": 0.00020426287744227355,
3388
+ "loss": 0.4306,
3389
+ "step": 4980
3390
+ },
3391
+ {
3392
+ "epoch": 8.14,
3393
+ "learning_rate": 0.0002024866785079929,
3394
+ "loss": 0.3966,
3395
+ "step": 4990
3396
+ },
3397
+ {
3398
+ "epoch": 8.16,
3399
+ "learning_rate": 0.00020071047957371228,
3400
+ "loss": 0.3672,
3401
+ "step": 5000
3402
+ },
3403
+ {
3404
+ "epoch": 8.16,
3405
+ "eval_loss": 0.4176868200302124,
3406
+ "eval_runtime": 12.7243,
3407
+ "eval_samples_per_second": 92.029,
3408
+ "eval_steps_per_second": 11.553,
3409
+ "step": 5000
3410
  }
3411
  ],
3412
  "logging_steps": 10,
3413
  "max_steps": 6130,
3414
  "num_train_epochs": 10,
3415
  "save_steps": 500,
3416
+ "total_flos": 9451155609649152.0,
3417
  "trial_name": null,
3418
  "trial_params": null
3419
  }