fats-fme commited on
Commit
dfe12e7
·
verified ·
1 Parent(s): dc23ae7

Training in progress, step 62, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6f62f3a4fb689c7ba8051e72971e6b48b8d501ffe4ec2fa67f6313b0ba71ec
3
  size 63592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4eb702a9b7c7ed07411895e8720380a30a37aa8dad1015b0b0e066676a1375
3
  size 63592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf715e2a13c84448a3ff9e9018304d4beccc087840b59ab29bbea95f48c66616
3
  size 136814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50168ad172490840ac22034001357964cdea60508d432babf332609d11d2e9e7
3
  size 136814
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f0769ac9f6e5a775e3172601984970ae5aeb6570d2e59f38a58d84854e22367
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf3e83c439bade2aaf5867209403de77e1402fb65d95d31520aa5a0b87ec2d8
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b69f085dcd7d3c6e07acfbd354d973bf19c16d6283ab3f9fe7eafe320fa66a1
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af96b89048af36316b3357414b80a223756ae79236a43f63f970ee1e48e15f72
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2997d603631ca8d1209b6aeca1ac3d249bd00d50d014e5ffdd28c52cc649ef27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1730f6bb92ab61ebae50615ec73091cf922a05ddb1e2aee31af293953da82b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.768,
5
  "eval_steps": 16,
6
- "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -375,6 +375,104 @@
375
  "eval_samples_per_second": 173.197,
376
  "eval_steps_per_second": 44.116,
377
  "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  }
379
  ],
380
  "logging_steps": 1,
@@ -389,12 +487,12 @@
389
  "should_evaluate": false,
390
  "should_log": false,
391
  "should_save": true,
392
- "should_training_stop": false
393
  },
394
  "attributes": {}
395
  }
396
  },
397
- "total_flos": 1437471866880.0,
398
  "train_batch_size": 2,
399
  "trial_name": null,
400
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.992,
5
  "eval_steps": 16,
6
+ "global_step": 62,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
375
  "eval_samples_per_second": 173.197,
376
  "eval_steps_per_second": 44.116,
377
  "step": 48
378
+ },
379
+ {
380
+ "epoch": 0.784,
381
+ "grad_norm": 1.6119565963745117,
382
+ "learning_rate": 4.3667994193637796e-05,
383
+ "loss": 88.609,
384
+ "step": 49
385
+ },
386
+ {
387
+ "epoch": 0.8,
388
+ "grad_norm": 1.591171145439148,
389
+ "learning_rate": 3.7651019814126654e-05,
390
+ "loss": 88.6227,
391
+ "step": 50
392
+ },
393
+ {
394
+ "epoch": 0.816,
395
+ "grad_norm": 1.7413899898529053,
396
+ "learning_rate": 3.198272622290804e-05,
397
+ "loss": 88.6242,
398
+ "step": 51
399
+ },
400
+ {
401
+ "epoch": 0.832,
402
+ "grad_norm": 1.8746490478515625,
403
+ "learning_rate": 2.669481281701739e-05,
404
+ "loss": 88.5348,
405
+ "step": 52
406
+ },
407
+ {
408
+ "epoch": 0.848,
409
+ "grad_norm": 1.946791172027588,
410
+ "learning_rate": 2.181685175319702e-05,
411
+ "loss": 88.5242,
412
+ "step": 53
413
+ },
414
+ {
415
+ "epoch": 0.864,
416
+ "grad_norm": 1.9902241230010986,
417
+ "learning_rate": 1.7376122568400532e-05,
418
+ "loss": 88.6423,
419
+ "step": 54
420
+ },
421
+ {
422
+ "epoch": 0.88,
423
+ "grad_norm": 1.9461379051208496,
424
+ "learning_rate": 1.339745962155613e-05,
425
+ "loss": 88.6454,
426
+ "step": 55
427
+ },
428
+ {
429
+ "epoch": 0.896,
430
+ "grad_norm": 2.044666290283203,
431
+ "learning_rate": 9.903113209758096e-06,
432
+ "loss": 88.5542,
433
+ "step": 56
434
+ },
435
+ {
436
+ "epoch": 0.912,
437
+ "grad_norm": 1.9682707786560059,
438
+ "learning_rate": 6.9126251355795864e-06,
439
+ "loss": 88.6255,
440
+ "step": 57
441
+ },
442
+ {
443
+ "epoch": 0.928,
444
+ "grad_norm": 2.2248926162719727,
445
+ "learning_rate": 4.442719421385922e-06,
446
+ "loss": 88.5276,
447
+ "step": 58
448
+ },
449
+ {
450
+ "epoch": 0.944,
451
+ "grad_norm": 2.1064934730529785,
452
+ "learning_rate": 2.5072087818176382e-06,
453
+ "loss": 88.6175,
454
+ "step": 59
455
+ },
456
+ {
457
+ "epoch": 0.96,
458
+ "grad_norm": 2.4718239307403564,
459
+ "learning_rate": 1.1169173774871478e-06,
460
+ "loss": 88.5009,
461
+ "step": 60
462
+ },
463
+ {
464
+ "epoch": 0.976,
465
+ "grad_norm": 2.4647696018218994,
466
+ "learning_rate": 2.7962028188198706e-07,
467
+ "loss": 88.6283,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 0.992,
472
+ "grad_norm": 3.2278006076812744,
473
+ "learning_rate": 0.0,
474
+ "loss": 88.4972,
475
+ "step": 62
476
  }
477
  ],
478
  "logging_steps": 1,
 
487
  "should_evaluate": false,
488
  "should_log": false,
489
  "should_save": true,
490
+ "should_training_stop": true
491
  },
492
  "attributes": {}
493
  }
494
  },
495
+ "total_flos": 1856734494720.0,
496
  "train_batch_size": 2,
497
  "trial_name": null,
498
  "trial_params": null