somaia02 commited on
Commit
4673e12
·
1 Parent(s): eab3aef

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9835ad6bb2b08ae316f9f11b0e8d1d3ebd81d56c95ce6836b93250c69a0da6c1
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9197a884173d9c36c93a016c2f30534c3f05c90874cb72eed644075307e2fce3
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58d2d3f9f8d82b76b4b01cc178bb373ee5e5c36fe33fa652a1d1f1d22485e65
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb84df5445e2433de304b15e77a1175375dd2d8c25dc61c78681c28414293e5
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5ed3dd25bda0e52429ee4870ac2ba7e5a4f0851368f27ee87285fe0a5714834
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f78ad885bfca269044336e349d57c368367d27fdbb75021a699360ecf7bb2b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f019f73cf3f7accb65e9b564bf8a83e5db913dbd19c9517ca220494d620a381
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a8d4a8a8e577d9dbdb51f074c9b711b4edce95beedab354e7f35ce094aeb0d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.42139747738838196,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-4000",
4
- "epoch": 6.525285481239804,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2727,13 +2727,353 @@
2727
  "eval_samples_per_second": 88.699,
2728
  "eval_steps_per_second": 11.135,
2729
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2730
  }
2731
  ],
2732
  "logging_steps": 10,
2733
  "max_steps": 6130,
2734
  "num_train_epochs": 10,
2735
  "save_steps": 500,
2736
- "total_flos": 7559248409395200.0,
2737
  "trial_name": null,
2738
  "trial_params": null
2739
  }
 
1
  {
2
+ "best_metric": 0.41941431164741516,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-4500",
4
+ "epoch": 7.3409461663947795,
5
  "eval_steps": 100,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2727
  "eval_samples_per_second": 88.699,
2728
  "eval_steps_per_second": 11.135,
2729
  "step": 4000
2730
+ },
2731
+ {
2732
+ "epoch": 6.54,
2733
+ "learning_rate": 0.0003765541740674956,
2734
+ "loss": 0.4198,
2735
+ "step": 4010
2736
+ },
2737
+ {
2738
+ "epoch": 6.56,
2739
+ "learning_rate": 0.0003747779751332149,
2740
+ "loss": 0.3908,
2741
+ "step": 4020
2742
+ },
2743
+ {
2744
+ "epoch": 6.57,
2745
+ "learning_rate": 0.00037300177619893426,
2746
+ "loss": 0.4296,
2747
+ "step": 4030
2748
+ },
2749
+ {
2750
+ "epoch": 6.59,
2751
+ "learning_rate": 0.00037122557726465365,
2752
+ "loss": 0.4183,
2753
+ "step": 4040
2754
+ },
2755
+ {
2756
+ "epoch": 6.61,
2757
+ "learning_rate": 0.000369449378330373,
2758
+ "loss": 0.4371,
2759
+ "step": 4050
2760
+ },
2761
+ {
2762
+ "epoch": 6.62,
2763
+ "learning_rate": 0.00036767317939609233,
2764
+ "loss": 0.3934,
2765
+ "step": 4060
2766
+ },
2767
+ {
2768
+ "epoch": 6.64,
2769
+ "learning_rate": 0.0003658969804618117,
2770
+ "loss": 0.4581,
2771
+ "step": 4070
2772
+ },
2773
+ {
2774
+ "epoch": 6.66,
2775
+ "learning_rate": 0.00036412078152753106,
2776
+ "loss": 0.3908,
2777
+ "step": 4080
2778
+ },
2779
+ {
2780
+ "epoch": 6.67,
2781
+ "learning_rate": 0.0003623445825932504,
2782
+ "loss": 0.4237,
2783
+ "step": 4090
2784
+ },
2785
+ {
2786
+ "epoch": 6.69,
2787
+ "learning_rate": 0.00036056838365896985,
2788
+ "loss": 0.3975,
2789
+ "step": 4100
2790
+ },
2791
+ {
2792
+ "epoch": 6.69,
2793
+ "eval_loss": 0.4162620007991791,
2794
+ "eval_runtime": 12.9992,
2795
+ "eval_samples_per_second": 90.082,
2796
+ "eval_steps_per_second": 11.308,
2797
+ "step": 4100
2798
+ },
2799
+ {
2800
+ "epoch": 6.7,
2801
+ "learning_rate": 0.0003587921847246892,
2802
+ "loss": 0.4152,
2803
+ "step": 4110
2804
+ },
2805
+ {
2806
+ "epoch": 6.72,
2807
+ "learning_rate": 0.0003570159857904086,
2808
+ "loss": 0.4353,
2809
+ "step": 4120
2810
+ },
2811
+ {
2812
+ "epoch": 6.74,
2813
+ "learning_rate": 0.0003552397868561279,
2814
+ "loss": 0.3991,
2815
+ "step": 4130
2816
+ },
2817
+ {
2818
+ "epoch": 6.75,
2819
+ "learning_rate": 0.00035346358792184726,
2820
+ "loss": 0.4094,
2821
+ "step": 4140
2822
+ },
2823
+ {
2824
+ "epoch": 6.77,
2825
+ "learning_rate": 0.00035168738898756665,
2826
+ "loss": 0.4099,
2827
+ "step": 4150
2828
+ },
2829
+ {
2830
+ "epoch": 6.79,
2831
+ "learning_rate": 0.000349911190053286,
2832
+ "loss": 0.4107,
2833
+ "step": 4160
2834
+ },
2835
+ {
2836
+ "epoch": 6.8,
2837
+ "learning_rate": 0.00034813499111900533,
2838
+ "loss": 0.433,
2839
+ "step": 4170
2840
+ },
2841
+ {
2842
+ "epoch": 6.82,
2843
+ "learning_rate": 0.0003463587921847247,
2844
+ "loss": 0.4267,
2845
+ "step": 4180
2846
+ },
2847
+ {
2848
+ "epoch": 6.84,
2849
+ "learning_rate": 0.00034458259325044407,
2850
+ "loss": 0.4373,
2851
+ "step": 4190
2852
+ },
2853
+ {
2854
+ "epoch": 6.85,
2855
+ "learning_rate": 0.0003428063943161634,
2856
+ "loss": 0.4053,
2857
+ "step": 4200
2858
+ },
2859
+ {
2860
+ "epoch": 6.85,
2861
+ "eval_loss": 0.4194377064704895,
2862
+ "eval_runtime": 13.0635,
2863
+ "eval_samples_per_second": 89.639,
2864
+ "eval_steps_per_second": 11.253,
2865
+ "step": 4200
2866
+ },
2867
+ {
2868
+ "epoch": 6.87,
2869
+ "learning_rate": 0.0003410301953818828,
2870
+ "loss": 0.4298,
2871
+ "step": 4210
2872
+ },
2873
+ {
2874
+ "epoch": 6.88,
2875
+ "learning_rate": 0.00033925399644760214,
2876
+ "loss": 0.4074,
2877
+ "step": 4220
2878
+ },
2879
+ {
2880
+ "epoch": 6.9,
2881
+ "learning_rate": 0.0003374777975133215,
2882
+ "loss": 0.423,
2883
+ "step": 4230
2884
+ },
2885
+ {
2886
+ "epoch": 6.92,
2887
+ "learning_rate": 0.00033570159857904087,
2888
+ "loss": 0.4292,
2889
+ "step": 4240
2890
+ },
2891
+ {
2892
+ "epoch": 6.93,
2893
+ "learning_rate": 0.0003339253996447602,
2894
+ "loss": 0.4068,
2895
+ "step": 4250
2896
+ },
2897
+ {
2898
+ "epoch": 6.95,
2899
+ "learning_rate": 0.0003321492007104796,
2900
+ "loss": 0.41,
2901
+ "step": 4260
2902
+ },
2903
+ {
2904
+ "epoch": 6.97,
2905
+ "learning_rate": 0.00033037300177619894,
2906
+ "loss": 0.4429,
2907
+ "step": 4270
2908
+ },
2909
+ {
2910
+ "epoch": 6.98,
2911
+ "learning_rate": 0.0003285968028419183,
2912
+ "loss": 0.4352,
2913
+ "step": 4280
2914
+ },
2915
+ {
2916
+ "epoch": 7.0,
2917
+ "learning_rate": 0.0003268206039076377,
2918
+ "loss": 0.4314,
2919
+ "step": 4290
2920
+ },
2921
+ {
2922
+ "epoch": 7.01,
2923
+ "learning_rate": 0.000325044404973357,
2924
+ "loss": 0.3817,
2925
+ "step": 4300
2926
+ },
2927
+ {
2928
+ "epoch": 7.01,
2929
+ "eval_loss": 0.42648157477378845,
2930
+ "eval_runtime": 12.8627,
2931
+ "eval_samples_per_second": 91.039,
2932
+ "eval_steps_per_second": 11.428,
2933
+ "step": 4300
2934
+ },
2935
+ {
2936
+ "epoch": 7.03,
2937
+ "learning_rate": 0.00032326820603907635,
2938
+ "loss": 0.4002,
2939
+ "step": 4310
2940
+ },
2941
+ {
2942
+ "epoch": 7.05,
2943
+ "learning_rate": 0.00032149200710479575,
2944
+ "loss": 0.397,
2945
+ "step": 4320
2946
+ },
2947
+ {
2948
+ "epoch": 7.06,
2949
+ "learning_rate": 0.0003197158081705151,
2950
+ "loss": 0.4064,
2951
+ "step": 4330
2952
+ },
2953
+ {
2954
+ "epoch": 7.08,
2955
+ "learning_rate": 0.0003179396092362344,
2956
+ "loss": 0.4314,
2957
+ "step": 4340
2958
+ },
2959
+ {
2960
+ "epoch": 7.1,
2961
+ "learning_rate": 0.0003161634103019538,
2962
+ "loss": 0.4149,
2963
+ "step": 4350
2964
+ },
2965
+ {
2966
+ "epoch": 7.11,
2967
+ "learning_rate": 0.00031438721136767316,
2968
+ "loss": 0.3956,
2969
+ "step": 4360
2970
+ },
2971
+ {
2972
+ "epoch": 7.13,
2973
+ "learning_rate": 0.00031261101243339255,
2974
+ "loss": 0.4218,
2975
+ "step": 4370
2976
+ },
2977
+ {
2978
+ "epoch": 7.15,
2979
+ "learning_rate": 0.0003108348134991119,
2980
+ "loss": 0.3959,
2981
+ "step": 4380
2982
+ },
2983
+ {
2984
+ "epoch": 7.16,
2985
+ "learning_rate": 0.00030905861456483123,
2986
+ "loss": 0.4071,
2987
+ "step": 4390
2988
+ },
2989
+ {
2990
+ "epoch": 7.18,
2991
+ "learning_rate": 0.0003072824156305506,
2992
+ "loss": 0.3927,
2993
+ "step": 4400
2994
+ },
2995
+ {
2996
+ "epoch": 7.18,
2997
+ "eval_loss": 0.41820862889289856,
2998
+ "eval_runtime": 12.8919,
2999
+ "eval_samples_per_second": 90.832,
3000
+ "eval_steps_per_second": 11.403,
3001
+ "step": 4400
3002
+ },
3003
+ {
3004
+ "epoch": 7.19,
3005
+ "learning_rate": 0.00030550621669627,
3006
+ "loss": 0.4069,
3007
+ "step": 4410
3008
+ },
3009
+ {
3010
+ "epoch": 7.21,
3011
+ "learning_rate": 0.00030373001776198936,
3012
+ "loss": 0.403,
3013
+ "step": 4420
3014
+ },
3015
+ {
3016
+ "epoch": 7.23,
3017
+ "learning_rate": 0.00030195381882770875,
3018
+ "loss": 0.402,
3019
+ "step": 4430
3020
+ },
3021
+ {
3022
+ "epoch": 7.24,
3023
+ "learning_rate": 0.0003001776198934281,
3024
+ "loss": 0.4277,
3025
+ "step": 4440
3026
+ },
3027
+ {
3028
+ "epoch": 7.26,
3029
+ "learning_rate": 0.00029840142095914743,
3030
+ "loss": 0.4067,
3031
+ "step": 4450
3032
+ },
3033
+ {
3034
+ "epoch": 7.28,
3035
+ "learning_rate": 0.0002966252220248668,
3036
+ "loss": 0.3873,
3037
+ "step": 4460
3038
+ },
3039
+ {
3040
+ "epoch": 7.29,
3041
+ "learning_rate": 0.00029484902309058616,
3042
+ "loss": 0.3955,
3043
+ "step": 4470
3044
+ },
3045
+ {
3046
+ "epoch": 7.31,
3047
+ "learning_rate": 0.00029307282415630555,
3048
+ "loss": 0.3845,
3049
+ "step": 4480
3050
+ },
3051
+ {
3052
+ "epoch": 7.32,
3053
+ "learning_rate": 0.0002912966252220249,
3054
+ "loss": 0.4149,
3055
+ "step": 4490
3056
+ },
3057
+ {
3058
+ "epoch": 7.34,
3059
+ "learning_rate": 0.00028952042628774423,
3060
+ "loss": 0.3996,
3061
+ "step": 4500
3062
+ },
3063
+ {
3064
+ "epoch": 7.34,
3065
+ "eval_loss": 0.41941431164741516,
3066
+ "eval_runtime": 12.818,
3067
+ "eval_samples_per_second": 91.356,
3068
+ "eval_steps_per_second": 11.468,
3069
+ "step": 4500
3070
  }
3071
  ],
3072
  "logging_steps": 10,
3073
  "max_steps": 6130,
3074
  "num_train_epochs": 10,
3075
  "save_steps": 500,
3076
+ "total_flos": 8507970531753984.0,
3077
  "trial_name": null,
3078
  "trial_params": null
3079
  }