Pranay17 commited on
Commit
dd8378a
·
verified ·
1 Parent(s): d487a8a

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3f3c155fe4ab7a572e2a5fa77a282892e0e22cc6fc61a8c53fab449042d36d5
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e3105355c2df4b040acd4c2944a96c6e4176ce252181e7699f9ea948f127e2
3
  size 42002584
last-checkpoint/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26b4cc6752f251b6fb91be7b0760735f433091c5b90914b97085bbbb5e101cea
3
+ size 251710672
last-checkpoint/global_step3000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4ad9578cfb99fd2b3d6f4d885e5682fcfae1e17bb18e6eb202fe75572e5cb0
3
+ size 47955328
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2500
 
1
+ global_step3000
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f01a4ea3836466236461a7e0a4041dbb7858ed9853ebc8a640dc7aeab392d3fa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b552361bcc18d8148b831b255d9e181d4beeb2d1ba3aee8779f232860eb665c7
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.245014245014245,
5
  "eval_steps": 1000,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -364,6 +364,76 @@
364
  "learning_rate": 7.517517517517519e-05,
365
  "loss": 0.0593,
366
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  }
368
  ],
369
  "logging_steps": 50,
@@ -383,7 +453,7 @@
383
  "attributes": {}
384
  }
385
  },
386
- "total_flos": 6.806026140306637e+16,
387
  "train_batch_size": 2,
388
  "trial_name": null,
389
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.094017094017094,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
364
  "learning_rate": 7.517517517517519e-05,
365
  "loss": 0.0593,
366
  "step": 2500
367
+ },
368
+ {
369
+ "epoch": 14.52991452991453,
370
+ "grad_norm": 0.1685921549797058,
371
+ "learning_rate": 7.267267267267268e-05,
372
+ "loss": 0.0579,
373
+ "step": 2550
374
+ },
375
+ {
376
+ "epoch": 14.814814814814815,
377
+ "grad_norm": 0.32586222887039185,
378
+ "learning_rate": 7.017017017017016e-05,
379
+ "loss": 0.0533,
380
+ "step": 2600
381
+ },
382
+ {
383
+ "epoch": 15.0997150997151,
384
+ "grad_norm": 0.6495370864868164,
385
+ "learning_rate": 6.766766766766767e-05,
386
+ "loss": 0.0575,
387
+ "step": 2650
388
+ },
389
+ {
390
+ "epoch": 15.384615384615385,
391
+ "grad_norm": 0.10936163365840912,
392
+ "learning_rate": 6.516516516516516e-05,
393
+ "loss": 0.0539,
394
+ "step": 2700
395
+ },
396
+ {
397
+ "epoch": 15.66951566951567,
398
+ "grad_norm": 0.09928351640701294,
399
+ "learning_rate": 6.266266266266266e-05,
400
+ "loss": 0.0573,
401
+ "step": 2750
402
+ },
403
+ {
404
+ "epoch": 15.954415954415955,
405
+ "grad_norm": 0.07429605722427368,
406
+ "learning_rate": 6.016016016016016e-05,
407
+ "loss": 0.0541,
408
+ "step": 2800
409
+ },
410
+ {
411
+ "epoch": 16.23931623931624,
412
+ "grad_norm": 0.0647626668214798,
413
+ "learning_rate": 5.765765765765766e-05,
414
+ "loss": 0.0546,
415
+ "step": 2850
416
+ },
417
+ {
418
+ "epoch": 16.524216524216524,
419
+ "grad_norm": 0.06490299850702286,
420
+ "learning_rate": 5.515515515515516e-05,
421
+ "loss": 0.0537,
422
+ "step": 2900
423
+ },
424
+ {
425
+ "epoch": 16.80911680911681,
426
+ "grad_norm": 0.07492049783468246,
427
+ "learning_rate": 5.2652652652652655e-05,
428
+ "loss": 0.0567,
429
+ "step": 2950
430
+ },
431
+ {
432
+ "epoch": 17.094017094017094,
433
+ "grad_norm": 0.18874266743659973,
434
+ "learning_rate": 5.015015015015015e-05,
435
+ "loss": 0.0534,
436
+ "step": 3000
437
  }
438
  ],
439
  "logging_steps": 50,
 
453
  "attributes": {}
454
  }
455
  },
456
+ "total_flos": 8.179327598749286e+16,
457
  "train_batch_size": 2,
458
  "trial_name": null,
459
  "trial_params": null