somaia02 commited on
Commit
01cd828
·
1 Parent(s): 52b79d2

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a8a809056b973f3df74a8e0401d2f6b3f48855ca88f92c074a282b2fc872456
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c53e59c2bd584a2174d8768ace772b5d5796aa08166aa25302904cb3a665ff7
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cace089f6f62f3f0c39575883a3ef27ad996c274acf20a6fd9d2d6ee12a92918
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed95370d354bc8f635abaa3992f7d5d462b1bcb514428235550ba0f48b08b85
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e4a99a8f1604eb88b6b6efd9bc6ced6e284090d6037a758d0561d10a951f004
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5f9b3cb2653c573d5d70e945ac9e78ff4b3999c649bc26690d113787889e70
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c82bedf0a611f290596df2fde142fbda2afa059d93dc846b92ee4f876380a79
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74be72e979aeba041a40f5740c89e95223a2d1671e242dd571b2005ebd09a8c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5469470024108887,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-500",
4
- "epoch": 0.8156606851549756,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -347,13 +347,353 @@
347
  "eval_samples_per_second": 190.861,
348
  "eval_steps_per_second": 23.96,
349
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  }
351
  ],
352
  "logging_steps": 10,
353
  "max_steps": 6130,
354
  "num_train_epochs": 10,
355
  "save_steps": 500,
356
- "total_flos": 939361765588992.0,
357
  "trial_name": null,
358
  "trial_params": null
359
  }
 
1
  {
2
+ "best_metric": 0.48665139079093933,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1000",
4
+ "epoch": 1.631321370309951,
5
  "eval_steps": 100,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
347
  "eval_samples_per_second": 190.861,
348
  "eval_steps_per_second": 23.96,
349
  "step": 500
350
+ },
351
+ {
352
+ "epoch": 0.83,
353
+ "learning_rate": 0.0009982238010657195,
354
+ "loss": 0.6725,
355
+ "step": 510
356
+ },
357
+ {
358
+ "epoch": 0.85,
359
+ "learning_rate": 0.0009964476021314388,
360
+ "loss": 0.6612,
361
+ "step": 520
362
+ },
363
+ {
364
+ "epoch": 0.86,
365
+ "learning_rate": 0.000994671403197158,
366
+ "loss": 0.6202,
367
+ "step": 530
368
+ },
369
+ {
370
+ "epoch": 0.88,
371
+ "learning_rate": 0.0009928952042628776,
372
+ "loss": 0.6085,
373
+ "step": 540
374
+ },
375
+ {
376
+ "epoch": 0.9,
377
+ "learning_rate": 0.0009911190053285969,
378
+ "loss": 0.6023,
379
+ "step": 550
380
+ },
381
+ {
382
+ "epoch": 0.91,
383
+ "learning_rate": 0.0009893428063943162,
384
+ "loss": 0.6209,
385
+ "step": 560
386
+ },
387
+ {
388
+ "epoch": 0.93,
389
+ "learning_rate": 0.0009875666074600357,
390
+ "loss": 0.6128,
391
+ "step": 570
392
+ },
393
+ {
394
+ "epoch": 0.95,
395
+ "learning_rate": 0.000985790408525755,
396
+ "loss": 0.5971,
397
+ "step": 580
398
+ },
399
+ {
400
+ "epoch": 0.96,
401
+ "learning_rate": 0.0009840142095914742,
402
+ "loss": 0.6028,
403
+ "step": 590
404
+ },
405
+ {
406
+ "epoch": 0.98,
407
+ "learning_rate": 0.0009822380106571937,
408
+ "loss": 0.6105,
409
+ "step": 600
410
+ },
411
+ {
412
+ "epoch": 0.98,
413
+ "eval_loss": 0.527148425579071,
414
+ "eval_runtime": 6.0982,
415
+ "eval_samples_per_second": 192.024,
416
+ "eval_steps_per_second": 24.105,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 1.0,
421
+ "learning_rate": 0.000980461811722913,
422
+ "loss": 0.6101,
423
+ "step": 610
424
+ },
425
+ {
426
+ "epoch": 1.01,
427
+ "learning_rate": 0.0009786856127886323,
428
+ "loss": 0.5349,
429
+ "step": 620
430
+ },
431
+ {
432
+ "epoch": 1.03,
433
+ "learning_rate": 0.0009769094138543518,
434
+ "loss": 0.5903,
435
+ "step": 630
436
+ },
437
+ {
438
+ "epoch": 1.04,
439
+ "learning_rate": 0.0009751332149200711,
440
+ "loss": 0.6033,
441
+ "step": 640
442
+ },
443
+ {
444
+ "epoch": 1.06,
445
+ "learning_rate": 0.0009733570159857904,
446
+ "loss": 0.544,
447
+ "step": 650
448
+ },
449
+ {
450
+ "epoch": 1.08,
451
+ "learning_rate": 0.0009715808170515098,
452
+ "loss": 0.5582,
453
+ "step": 660
454
+ },
455
+ {
456
+ "epoch": 1.09,
457
+ "learning_rate": 0.0009698046181172292,
458
+ "loss": 0.5488,
459
+ "step": 670
460
+ },
461
+ {
462
+ "epoch": 1.11,
463
+ "learning_rate": 0.0009680284191829485,
464
+ "loss": 0.5799,
465
+ "step": 680
466
+ },
467
+ {
468
+ "epoch": 1.13,
469
+ "learning_rate": 0.0009662522202486678,
470
+ "loss": 0.5857,
471
+ "step": 690
472
+ },
473
+ {
474
+ "epoch": 1.14,
475
+ "learning_rate": 0.0009644760213143872,
476
+ "loss": 0.5857,
477
+ "step": 700
478
+ },
479
+ {
480
+ "epoch": 1.14,
481
+ "eval_loss": 0.5239382982254028,
482
+ "eval_runtime": 6.2471,
483
+ "eval_samples_per_second": 187.448,
484
+ "eval_steps_per_second": 23.531,
485
+ "step": 700
486
+ },
487
+ {
488
+ "epoch": 1.16,
489
+ "learning_rate": 0.0009626998223801065,
490
+ "loss": 0.5289,
491
+ "step": 710
492
+ },
493
+ {
494
+ "epoch": 1.17,
495
+ "learning_rate": 0.0009609236234458259,
496
+ "loss": 0.6082,
497
+ "step": 720
498
+ },
499
+ {
500
+ "epoch": 1.19,
501
+ "learning_rate": 0.0009591474245115453,
502
+ "loss": 0.5837,
503
+ "step": 730
504
+ },
505
+ {
506
+ "epoch": 1.21,
507
+ "learning_rate": 0.0009573712255772646,
508
+ "loss": 0.5571,
509
+ "step": 740
510
+ },
511
+ {
512
+ "epoch": 1.22,
513
+ "learning_rate": 0.000955595026642984,
514
+ "loss": 0.5948,
515
+ "step": 750
516
+ },
517
+ {
518
+ "epoch": 1.24,
519
+ "learning_rate": 0.0009538188277087034,
520
+ "loss": 0.5455,
521
+ "step": 760
522
+ },
523
+ {
524
+ "epoch": 1.26,
525
+ "learning_rate": 0.0009520426287744227,
526
+ "loss": 0.5858,
527
+ "step": 770
528
+ },
529
+ {
530
+ "epoch": 1.27,
531
+ "learning_rate": 0.0009502664298401421,
532
+ "loss": 0.5289,
533
+ "step": 780
534
+ },
535
+ {
536
+ "epoch": 1.29,
537
+ "learning_rate": 0.0009484902309058615,
538
+ "loss": 0.6011,
539
+ "step": 790
540
+ },
541
+ {
542
+ "epoch": 1.31,
543
+ "learning_rate": 0.0009467140319715807,
544
+ "loss": 0.5841,
545
+ "step": 800
546
+ },
547
+ {
548
+ "epoch": 1.31,
549
+ "eval_loss": 0.5005862712860107,
550
+ "eval_runtime": 5.9494,
551
+ "eval_samples_per_second": 196.827,
552
+ "eval_steps_per_second": 24.708,
553
+ "step": 800
554
+ },
555
+ {
556
+ "epoch": 1.32,
557
+ "learning_rate": 0.0009449378330373001,
558
+ "loss": 0.5403,
559
+ "step": 810
560
+ },
561
+ {
562
+ "epoch": 1.34,
563
+ "learning_rate": 0.0009431616341030196,
564
+ "loss": 0.558,
565
+ "step": 820
566
+ },
567
+ {
568
+ "epoch": 1.35,
569
+ "learning_rate": 0.0009413854351687389,
570
+ "loss": 0.5435,
571
+ "step": 830
572
+ },
573
+ {
574
+ "epoch": 1.37,
575
+ "learning_rate": 0.0009396092362344583,
576
+ "loss": 0.5341,
577
+ "step": 840
578
+ },
579
+ {
580
+ "epoch": 1.39,
581
+ "learning_rate": 0.0009378330373001777,
582
+ "loss": 0.5398,
583
+ "step": 850
584
+ },
585
+ {
586
+ "epoch": 1.4,
587
+ "learning_rate": 0.000936056838365897,
588
+ "loss": 0.5548,
589
+ "step": 860
590
+ },
591
+ {
592
+ "epoch": 1.42,
593
+ "learning_rate": 0.0009342806394316164,
594
+ "loss": 0.5438,
595
+ "step": 870
596
+ },
597
+ {
598
+ "epoch": 1.44,
599
+ "learning_rate": 0.0009325044404973358,
600
+ "loss": 0.5845,
601
+ "step": 880
602
+ },
603
+ {
604
+ "epoch": 1.45,
605
+ "learning_rate": 0.0009307282415630552,
606
+ "loss": 0.5146,
607
+ "step": 890
608
+ },
609
+ {
610
+ "epoch": 1.47,
611
+ "learning_rate": 0.0009289520426287745,
612
+ "loss": 0.5274,
613
+ "step": 900
614
+ },
615
+ {
616
+ "epoch": 1.47,
617
+ "eval_loss": 0.49163827300071716,
618
+ "eval_runtime": 6.0349,
619
+ "eval_samples_per_second": 194.04,
620
+ "eval_steps_per_second": 24.359,
621
+ "step": 900
622
+ },
623
+ {
624
+ "epoch": 1.48,
625
+ "learning_rate": 0.0009271758436944939,
626
+ "loss": 0.5999,
627
+ "step": 910
628
+ },
629
+ {
630
+ "epoch": 1.5,
631
+ "learning_rate": 0.0009253996447602132,
632
+ "loss": 0.5798,
633
+ "step": 920
634
+ },
635
+ {
636
+ "epoch": 1.52,
637
+ "learning_rate": 0.0009236234458259325,
638
+ "loss": 0.5465,
639
+ "step": 930
640
+ },
641
+ {
642
+ "epoch": 1.53,
643
+ "learning_rate": 0.0009218472468916519,
644
+ "loss": 0.5752,
645
+ "step": 940
646
+ },
647
+ {
648
+ "epoch": 1.55,
649
+ "learning_rate": 0.0009200710479573713,
650
+ "loss": 0.5544,
651
+ "step": 950
652
+ },
653
+ {
654
+ "epoch": 1.57,
655
+ "learning_rate": 0.0009182948490230906,
656
+ "loss": 0.5576,
657
+ "step": 960
658
+ },
659
+ {
660
+ "epoch": 1.58,
661
+ "learning_rate": 0.00091651865008881,
662
+ "loss": 0.5351,
663
+ "step": 970
664
+ },
665
+ {
666
+ "epoch": 1.6,
667
+ "learning_rate": 0.0009147424511545294,
668
+ "loss": 0.5584,
669
+ "step": 980
670
+ },
671
+ {
672
+ "epoch": 1.62,
673
+ "learning_rate": 0.0009129662522202487,
674
+ "loss": 0.5191,
675
+ "step": 990
676
+ },
677
+ {
678
+ "epoch": 1.63,
679
+ "learning_rate": 0.0009111900532859681,
680
+ "loss": 0.5304,
681
+ "step": 1000
682
+ },
683
+ {
684
+ "epoch": 1.63,
685
+ "eval_loss": 0.48665139079093933,
686
+ "eval_runtime": 5.92,
687
+ "eval_samples_per_second": 197.803,
688
+ "eval_steps_per_second": 24.831,
689
+ "step": 1000
690
  }
691
  ],
692
  "logging_steps": 10,
693
  "max_steps": 6130,
694
  "num_train_epochs": 10,
695
  "save_steps": 500,
696
+ "total_flos": 1883435087757312.0,
697
  "trial_name": null,
698
  "trial_params": null
699
  }