error577 commited on
Commit
37ad8f5
·
verified ·
1 Parent(s): e4e7fa4

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aba9407221c1f891085692796698f7a341f8458f3b8a9dbe7e6741501bb3713a
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78753ab537273541f6cd45d76e5c1871d220cbd5009e6b6482631c3c20a93b0b
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:832eaca613f0bbac9047e6faee88be5b5faa5cb1cac23ff7616bd970e065616e
3
  size 164465012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d1a9cd9ded4881a65505bdf5c118594bd0f7af5436648ab7f5f7c4a3fd4217
3
  size 164465012
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ce40886c9babbebea3e7d91df4df2be439bdea45ab11f237bc0e4cdaf7a98b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37553bbc12e5a59786b13569ace17196557ba6326aba9cbcada11c4ecab2bcdf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc4359ecb90a50bbea116b9a12b6a99effac889cc2d40f6093f2c443d61fb593
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acaca55091cad13358c11632689bee2ba722202048435717d3d6f988abffbf55
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.39342138171195984,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 0.5941014215998303,
5
  "eval_steps": 50,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
2521
  "eval_samples_per_second": 2.943,
2522
  "eval_steps_per_second": 2.943,
2523
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2524
  }
2525
  ],
2526
  "logging_steps": 1,
@@ -2535,7 +2893,7 @@
2535
  "early_stopping_threshold": 0.0
2536
  },
2537
  "attributes": {
2538
- "early_stopping_patience_counter": 2
2539
  }
2540
  },
2541
  "TrainerControl": {
@@ -2549,7 +2907,7 @@
2549
  "attributes": {}
2550
  }
2551
  },
2552
- "total_flos": 2.2671983590244352e+17,
2553
  "train_batch_size": 1,
2554
  "trial_name": null,
2555
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3837679922580719,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
+ "epoch": 0.6789730532569489,
5
  "eval_steps": 50,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2521
  "eval_samples_per_second": 2.943,
2522
  "eval_steps_per_second": 2.943,
2523
  "step": 350
2524
+ },
2525
+ {
2526
+ "epoch": 0.5957988542329726,
2527
+ "grad_norm": 0.4255245625972748,
2528
+ "learning_rate": 0.00022042073441788358,
2529
+ "loss": 1.1602,
2530
+ "step": 351
2531
+ },
2532
+ {
2533
+ "epoch": 0.597496286866115,
2534
+ "grad_norm": 1.3056901693344116,
2535
+ "learning_rate": 0.00022000009848385105,
2536
+ "loss": 0.9796,
2537
+ "step": 352
2538
+ },
2539
+ {
2540
+ "epoch": 0.5991937194992574,
2541
+ "grad_norm": 0.21184608340263367,
2542
+ "learning_rate": 0.0002195787576496039,
2543
+ "loss": 0.6262,
2544
+ "step": 353
2545
+ },
2546
+ {
2547
+ "epoch": 0.6008911521323997,
2548
+ "grad_norm": 0.24157075583934784,
2549
+ "learning_rate": 0.00021915671615803966,
2550
+ "loss": 0.8544,
2551
+ "step": 354
2552
+ },
2553
+ {
2554
+ "epoch": 0.6025885847655421,
2555
+ "grad_norm": 0.20036348700523376,
2556
+ "learning_rate": 0.00021873397825911153,
2557
+ "loss": 0.6267,
2558
+ "step": 355
2559
+ },
2560
+ {
2561
+ "epoch": 0.6042860173986845,
2562
+ "grad_norm": 0.20190277695655823,
2563
+ "learning_rate": 0.00021831054820978544,
2564
+ "loss": 0.6421,
2565
+ "step": 356
2566
+ },
2567
+ {
2568
+ "epoch": 0.6059834500318269,
2569
+ "grad_norm": 0.2218364179134369,
2570
+ "learning_rate": 0.00021788643027399724,
2571
+ "loss": 0.7318,
2572
+ "step": 357
2573
+ },
2574
+ {
2575
+ "epoch": 0.6076808826649692,
2576
+ "grad_norm": 0.2376060038805008,
2577
+ "learning_rate": 0.00021746162872260985,
2578
+ "loss": 0.8077,
2579
+ "step": 358
2580
+ },
2581
+ {
2582
+ "epoch": 0.6093783152981116,
2583
+ "grad_norm": 0.19997930526733398,
2584
+ "learning_rate": 0.0002170361478333702,
2585
+ "loss": 0.6109,
2586
+ "step": 359
2587
+ },
2588
+ {
2589
+ "epoch": 0.611075747931254,
2590
+ "grad_norm": 0.25473812222480774,
2591
+ "learning_rate": 0.0002166099918908661,
2592
+ "loss": 0.7628,
2593
+ "step": 360
2594
+ },
2595
+ {
2596
+ "epoch": 0.6127731805643963,
2597
+ "grad_norm": 0.16967284679412842,
2598
+ "learning_rate": 0.00021618316518648317,
2599
+ "loss": 0.3639,
2600
+ "step": 361
2601
+ },
2602
+ {
2603
+ "epoch": 0.6144706131975387,
2604
+ "grad_norm": 0.2045927196741104,
2605
+ "learning_rate": 0.0002157556720183616,
2606
+ "loss": 0.5688,
2607
+ "step": 362
2608
+ },
2609
+ {
2610
+ "epoch": 0.6161680458306811,
2611
+ "grad_norm": 0.30063769221305847,
2612
+ "learning_rate": 0.00021532751669135284,
2613
+ "loss": 0.8787,
2614
+ "step": 363
2615
+ },
2616
+ {
2617
+ "epoch": 0.6178654784638234,
2618
+ "grad_norm": 0.22912342846393585,
2619
+ "learning_rate": 0.00021489870351697622,
2620
+ "loss": 0.5724,
2621
+ "step": 364
2622
+ },
2623
+ {
2624
+ "epoch": 0.6195629110969658,
2625
+ "grad_norm": 0.1712283343076706,
2626
+ "learning_rate": 0.00021446923681337575,
2627
+ "loss": 0.3771,
2628
+ "step": 365
2629
+ },
2630
+ {
2631
+ "epoch": 0.6212603437301082,
2632
+ "grad_norm": 0.22983159124851227,
2633
+ "learning_rate": 0.00021403912090527623,
2634
+ "loss": 0.6274,
2635
+ "step": 366
2636
+ },
2637
+ {
2638
+ "epoch": 0.6229577763632506,
2639
+ "grad_norm": 0.2075144350528717,
2640
+ "learning_rate": 0.00021360836012394025,
2641
+ "loss": 0.5276,
2642
+ "step": 367
2643
+ },
2644
+ {
2645
+ "epoch": 0.6246552089963929,
2646
+ "grad_norm": 0.1731417328119278,
2647
+ "learning_rate": 0.00021317695880712398,
2648
+ "loss": 0.314,
2649
+ "step": 368
2650
+ },
2651
+ {
2652
+ "epoch": 0.6263526416295353,
2653
+ "grad_norm": 0.1949385702610016,
2654
+ "learning_rate": 0.0002127449212990339,
2655
+ "loss": 0.4633,
2656
+ "step": 369
2657
+ },
2658
+ {
2659
+ "epoch": 0.6280500742626777,
2660
+ "grad_norm": 0.20679379999637604,
2661
+ "learning_rate": 0.00021231225195028297,
2662
+ "loss": 0.4547,
2663
+ "step": 370
2664
+ },
2665
+ {
2666
+ "epoch": 0.62974750689582,
2667
+ "grad_norm": 0.1853644698858261,
2668
+ "learning_rate": 0.00021187895511784666,
2669
+ "loss": 0.3758,
2670
+ "step": 371
2671
+ },
2672
+ {
2673
+ "epoch": 0.6314449395289624,
2674
+ "grad_norm": 0.2339005470275879,
2675
+ "learning_rate": 0.00021144503516501927,
2676
+ "loss": 0.4251,
2677
+ "step": 372
2678
+ },
2679
+ {
2680
+ "epoch": 0.6331423721621048,
2681
+ "grad_norm": 0.12531976401805878,
2682
+ "learning_rate": 0.00021101049646137003,
2683
+ "loss": 0.1716,
2684
+ "step": 373
2685
+ },
2686
+ {
2687
+ "epoch": 0.6348398047952472,
2688
+ "grad_norm": 0.12999360263347626,
2689
+ "learning_rate": 0.00021057534338269872,
2690
+ "loss": 0.2032,
2691
+ "step": 374
2692
+ },
2693
+ {
2694
+ "epoch": 0.6365372374283895,
2695
+ "grad_norm": 0.1318761557340622,
2696
+ "learning_rate": 0.00021013958031099205,
2697
+ "loss": 0.1968,
2698
+ "step": 375
2699
+ },
2700
+ {
2701
+ "epoch": 0.6382346700615319,
2702
+ "grad_norm": 0.06346186250448227,
2703
+ "learning_rate": 0.00020970321163437934,
2704
+ "loss": 0.05,
2705
+ "step": 376
2706
+ },
2707
+ {
2708
+ "epoch": 0.6399321026946743,
2709
+ "grad_norm": 0.12222940474748611,
2710
+ "learning_rate": 0.00020926624174708827,
2711
+ "loss": 0.1675,
2712
+ "step": 377
2713
+ },
2714
+ {
2715
+ "epoch": 0.6416295353278166,
2716
+ "grad_norm": 0.12370602786540985,
2717
+ "learning_rate": 0.0002088286750494008,
2718
+ "loss": 0.0724,
2719
+ "step": 378
2720
+ },
2721
+ {
2722
+ "epoch": 0.643326967960959,
2723
+ "grad_norm": 0.0037796611431986094,
2724
+ "learning_rate": 0.00020839051594760872,
2725
+ "loss": 0.0002,
2726
+ "step": 379
2727
+ },
2728
+ {
2729
+ "epoch": 0.6450244005941014,
2730
+ "grad_norm": 0.08905645459890366,
2731
+ "learning_rate": 0.00020795176885396926,
2732
+ "loss": 0.0305,
2733
+ "step": 380
2734
+ },
2735
+ {
2736
+ "epoch": 0.6467218332272437,
2737
+ "grad_norm": 0.11877533793449402,
2738
+ "learning_rate": 0.00020751243818666087,
2739
+ "loss": 0.1818,
2740
+ "step": 381
2741
+ },
2742
+ {
2743
+ "epoch": 0.6484192658603861,
2744
+ "grad_norm": 0.0044077117927372456,
2745
+ "learning_rate": 0.00020707252836973844,
2746
+ "loss": 0.0001,
2747
+ "step": 382
2748
+ },
2749
+ {
2750
+ "epoch": 0.6501166984935285,
2751
+ "grad_norm": 0.0014336027670651674,
2752
+ "learning_rate": 0.00020663204383308898,
2753
+ "loss": 0.0001,
2754
+ "step": 383
2755
+ },
2756
+ {
2757
+ "epoch": 0.6518141311266709,
2758
+ "grad_norm": 0.025872627273201942,
2759
+ "learning_rate": 0.0002061909890123868,
2760
+ "loss": 0.0031,
2761
+ "step": 384
2762
+ },
2763
+ {
2764
+ "epoch": 0.6535115637598132,
2765
+ "grad_norm": 0.007007645908743143,
2766
+ "learning_rate": 0.0002057493683490491,
2767
+ "loss": 0.0002,
2768
+ "step": 385
2769
+ },
2770
+ {
2771
+ "epoch": 0.6552089963929556,
2772
+ "grad_norm": 0.01107161957770586,
2773
+ "learning_rate": 0.0002053071862901911,
2774
+ "loss": 0.0003,
2775
+ "step": 386
2776
+ },
2777
+ {
2778
+ "epoch": 0.6569064290260981,
2779
+ "grad_norm": 0.014977892860770226,
2780
+ "learning_rate": 0.00020486444728858117,
2781
+ "loss": 0.0003,
2782
+ "step": 387
2783
+ },
2784
+ {
2785
+ "epoch": 0.6586038616592405,
2786
+ "grad_norm": 0.011495725251734257,
2787
+ "learning_rate": 0.00020442115580259613,
2788
+ "loss": 0.0002,
2789
+ "step": 388
2790
+ },
2791
+ {
2792
+ "epoch": 0.6603012942923828,
2793
+ "grad_norm": 0.0011856303317472339,
2794
+ "learning_rate": 0.00020397731629617636,
2795
+ "loss": 0.0001,
2796
+ "step": 389
2797
+ },
2798
+ {
2799
+ "epoch": 0.6619987269255252,
2800
+ "grad_norm": 0.000914178614038974,
2801
+ "learning_rate": 0.00020353293323878074,
2802
+ "loss": 0.0,
2803
+ "step": 390
2804
+ },
2805
+ {
2806
+ "epoch": 0.6636961595586676,
2807
+ "grad_norm": 0.01322512049227953,
2808
+ "learning_rate": 0.00020308801110534178,
2809
+ "loss": 0.0003,
2810
+ "step": 391
2811
+ },
2812
+ {
2813
+ "epoch": 0.6653935921918099,
2814
+ "grad_norm": 0.07302884012460709,
2815
+ "learning_rate": 0.00020264255437622036,
2816
+ "loss": 0.0008,
2817
+ "step": 392
2818
+ },
2819
+ {
2820
+ "epoch": 0.6670910248249523,
2821
+ "grad_norm": 0.002658440498635173,
2822
+ "learning_rate": 0.00020219656753716074,
2823
+ "loss": 0.0001,
2824
+ "step": 393
2825
+ },
2826
+ {
2827
+ "epoch": 0.6687884574580947,
2828
+ "grad_norm": 0.0038816186133772135,
2829
+ "learning_rate": 0.00020175005507924558,
2830
+ "loss": 0.0001,
2831
+ "step": 394
2832
+ },
2833
+ {
2834
+ "epoch": 0.670485890091237,
2835
+ "grad_norm": 0.0019201135728508234,
2836
+ "learning_rate": 0.00020130302149885031,
2837
+ "loss": 0.0,
2838
+ "step": 395
2839
+ },
2840
+ {
2841
+ "epoch": 0.6721833227243794,
2842
+ "grad_norm": 0.005636914633214474,
2843
+ "learning_rate": 0.00020085547129759806,
2844
+ "loss": 0.0002,
2845
+ "step": 396
2846
+ },
2847
+ {
2848
+ "epoch": 0.6738807553575218,
2849
+ "grad_norm": 0.003746110713109374,
2850
+ "learning_rate": 0.00020040740898231448,
2851
+ "loss": 0.0001,
2852
+ "step": 397
2853
+ },
2854
+ {
2855
+ "epoch": 0.6755781879906642,
2856
+ "grad_norm": 0.004497275687754154,
2857
+ "learning_rate": 0.0001999588390649821,
2858
+ "loss": 0.0001,
2859
+ "step": 398
2860
+ },
2861
+ {
2862
+ "epoch": 0.6772756206238065,
2863
+ "grad_norm": 0.004322202410548925,
2864
+ "learning_rate": 0.00019950976606269497,
2865
+ "loss": 0.0001,
2866
+ "step": 399
2867
+ },
2868
+ {
2869
+ "epoch": 0.6789730532569489,
2870
+ "grad_norm": 0.025630857795476913,
2871
+ "learning_rate": 0.00019906019449761325,
2872
+ "loss": 0.0003,
2873
+ "step": 400
2874
+ },
2875
+ {
2876
+ "epoch": 0.6789730532569489,
2877
+ "eval_loss": 0.3837679922580719,
2878
+ "eval_runtime": 65.6798,
2879
+ "eval_samples_per_second": 2.938,
2880
+ "eval_steps_per_second": 2.938,
2881
+ "step": 400
2882
  }
2883
  ],
2884
  "logging_steps": 1,
 
2893
  "early_stopping_threshold": 0.0
2894
  },
2895
  "attributes": {
2896
+ "early_stopping_patience_counter": 0
2897
  }
2898
  },
2899
  "TrainerControl": {
 
2907
  "attributes": {}
2908
  }
2909
  },
2910
+ "total_flos": 2.5934376577794048e+17,
2911
  "train_batch_size": 1,
2912
  "trial_name": null,
2913
  "trial_params": null