warmestman commited on
Commit
2460e8b
·
verified ·
1 Parent(s): 1eae3c8

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:374ed8949342602163ff342ab341b7f2396c86df086fc325532b8011637eade8
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0dc78eb3736d5a634642f33b04366ae8bc82d44f79e35262912a8f79a297da9
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afa46570533cc904339e6f587cf429f834efd131ac812971f1bbe438d6109678
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd0b804f4d8175687f3626e929c7766181c67e2d08bc33bc20168248f4a9ba9
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:401fa0df85ea07bc908110453d152ebafc1f512885a204e1dfe675e1f118de6f
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b9a68cbcd5de279cabaa6560612707ffd9ca1a64ece09207391eab36fc73a58
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e397e2074aa93e5dab4d712d0187ee9f61d0160362667daac2e2f5af3c9baa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7b6ce3ff2951e767c721be76510187471b655709a639fc1d52c719fdc5af80
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b9dc452a973f46fe6cf13a85f3a4e2b3a33ff360bd385917df50f48d4a73a2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d49fca63da670319ba76874c77a485016cecfb604305dc12d82741a53f00d7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 38.2982234200539,
3
- "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-2000",
4
- "epoch": 17.964071856287426,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -754,6 +754,255 @@
754
  "eval_steps_per_second": 0.092,
755
  "eval_wer": 38.54023431054397,
756
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
  }
758
  ],
759
  "logging_steps": 25,
@@ -761,7 +1010,7 @@
761
  "num_input_tokens_seen": 0,
762
  "num_train_epochs": 60,
763
  "save_steps": 1000,
764
- "total_flos": 1.6290664630124544e+20,
765
  "train_batch_size": 16,
766
  "trial_name": null,
767
  "trial_params": null
 
1
  {
2
+ "best_metric": 38.127715747208626,
3
+ "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-4000",
4
+ "epoch": 23.952095808383234,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
754
  "eval_steps_per_second": 0.092,
755
  "eval_wer": 38.54023431054397,
756
  "step": 3000
757
+ },
758
+ {
759
+ "epoch": 18.11,
760
+ "learning_rate": 7.342105263157895e-05,
761
+ "loss": 0.0094,
762
+ "step": 3025
763
+ },
764
+ {
765
+ "epoch": 18.26,
766
+ "learning_rate": 7.315789473684212e-05,
767
+ "loss": 0.0085,
768
+ "step": 3050
769
+ },
770
+ {
771
+ "epoch": 18.41,
772
+ "learning_rate": 7.289473684210527e-05,
773
+ "loss": 0.0089,
774
+ "step": 3075
775
+ },
776
+ {
777
+ "epoch": 18.56,
778
+ "learning_rate": 7.263157894736843e-05,
779
+ "loss": 0.0101,
780
+ "step": 3100
781
+ },
782
+ {
783
+ "epoch": 18.71,
784
+ "learning_rate": 7.236842105263159e-05,
785
+ "loss": 0.0091,
786
+ "step": 3125
787
+ },
788
+ {
789
+ "epoch": 18.86,
790
+ "learning_rate": 7.210526315789474e-05,
791
+ "loss": 0.0106,
792
+ "step": 3150
793
+ },
794
+ {
795
+ "epoch": 19.01,
796
+ "learning_rate": 7.18421052631579e-05,
797
+ "loss": 0.01,
798
+ "step": 3175
799
+ },
800
+ {
801
+ "epoch": 19.16,
802
+ "learning_rate": 7.157894736842105e-05,
803
+ "loss": 0.0067,
804
+ "step": 3200
805
+ },
806
+ {
807
+ "epoch": 19.31,
808
+ "learning_rate": 7.131578947368421e-05,
809
+ "loss": 0.0077,
810
+ "step": 3225
811
+ },
812
+ {
813
+ "epoch": 19.46,
814
+ "learning_rate": 7.105263157894737e-05,
815
+ "loss": 0.0072,
816
+ "step": 3250
817
+ },
818
+ {
819
+ "epoch": 19.61,
820
+ "learning_rate": 7.078947368421052e-05,
821
+ "loss": 0.0066,
822
+ "step": 3275
823
+ },
824
+ {
825
+ "epoch": 19.76,
826
+ "learning_rate": 7.052631578947368e-05,
827
+ "loss": 0.0073,
828
+ "step": 3300
829
+ },
830
+ {
831
+ "epoch": 19.91,
832
+ "learning_rate": 7.026315789473684e-05,
833
+ "loss": 0.0079,
834
+ "step": 3325
835
+ },
836
+ {
837
+ "epoch": 20.06,
838
+ "learning_rate": 7e-05,
839
+ "loss": 0.0101,
840
+ "step": 3350
841
+ },
842
+ {
843
+ "epoch": 20.21,
844
+ "learning_rate": 6.973684210526315e-05,
845
+ "loss": 0.0075,
846
+ "step": 3375
847
+ },
848
+ {
849
+ "epoch": 20.36,
850
+ "learning_rate": 6.947368421052632e-05,
851
+ "loss": 0.0096,
852
+ "step": 3400
853
+ },
854
+ {
855
+ "epoch": 20.51,
856
+ "learning_rate": 6.921052631578948e-05,
857
+ "loss": 0.0095,
858
+ "step": 3425
859
+ },
860
+ {
861
+ "epoch": 20.66,
862
+ "learning_rate": 6.894736842105263e-05,
863
+ "loss": 0.0105,
864
+ "step": 3450
865
+ },
866
+ {
867
+ "epoch": 20.81,
868
+ "learning_rate": 6.868421052631579e-05,
869
+ "loss": 0.0112,
870
+ "step": 3475
871
+ },
872
+ {
873
+ "epoch": 20.96,
874
+ "learning_rate": 6.842105263157895e-05,
875
+ "loss": 0.0116,
876
+ "step": 3500
877
+ },
878
+ {
879
+ "epoch": 21.11,
880
+ "learning_rate": 6.81578947368421e-05,
881
+ "loss": 0.0088,
882
+ "step": 3525
883
+ },
884
+ {
885
+ "epoch": 21.26,
886
+ "learning_rate": 6.789473684210527e-05,
887
+ "loss": 0.01,
888
+ "step": 3550
889
+ },
890
+ {
891
+ "epoch": 21.41,
892
+ "learning_rate": 6.763157894736843e-05,
893
+ "loss": 0.01,
894
+ "step": 3575
895
+ },
896
+ {
897
+ "epoch": 21.56,
898
+ "learning_rate": 6.736842105263159e-05,
899
+ "loss": 0.0083,
900
+ "step": 3600
901
+ },
902
+ {
903
+ "epoch": 21.71,
904
+ "learning_rate": 6.710526315789474e-05,
905
+ "loss": 0.0088,
906
+ "step": 3625
907
+ },
908
+ {
909
+ "epoch": 21.86,
910
+ "learning_rate": 6.68421052631579e-05,
911
+ "loss": 0.0086,
912
+ "step": 3650
913
+ },
914
+ {
915
+ "epoch": 22.01,
916
+ "learning_rate": 6.657894736842106e-05,
917
+ "loss": 0.0078,
918
+ "step": 3675
919
+ },
920
+ {
921
+ "epoch": 22.16,
922
+ "learning_rate": 6.631578947368421e-05,
923
+ "loss": 0.0061,
924
+ "step": 3700
925
+ },
926
+ {
927
+ "epoch": 22.31,
928
+ "learning_rate": 6.605263157894738e-05,
929
+ "loss": 0.0073,
930
+ "step": 3725
931
+ },
932
+ {
933
+ "epoch": 22.46,
934
+ "learning_rate": 6.578947368421054e-05,
935
+ "loss": 0.0071,
936
+ "step": 3750
937
+ },
938
+ {
939
+ "epoch": 22.6,
940
+ "learning_rate": 6.55263157894737e-05,
941
+ "loss": 0.0085,
942
+ "step": 3775
943
+ },
944
+ {
945
+ "epoch": 22.75,
946
+ "learning_rate": 6.526315789473685e-05,
947
+ "loss": 0.0082,
948
+ "step": 3800
949
+ },
950
+ {
951
+ "epoch": 22.9,
952
+ "learning_rate": 6.500000000000001e-05,
953
+ "loss": 0.0098,
954
+ "step": 3825
955
+ },
956
+ {
957
+ "epoch": 23.05,
958
+ "learning_rate": 6.473684210526316e-05,
959
+ "loss": 0.0087,
960
+ "step": 3850
961
+ },
962
+ {
963
+ "epoch": 23.2,
964
+ "learning_rate": 6.447368421052632e-05,
965
+ "loss": 0.0087,
966
+ "step": 3875
967
+ },
968
+ {
969
+ "epoch": 23.35,
970
+ "learning_rate": 6.421052631578948e-05,
971
+ "loss": 0.0076,
972
+ "step": 3900
973
+ },
974
+ {
975
+ "epoch": 23.5,
976
+ "learning_rate": 6.394736842105263e-05,
977
+ "loss": 0.0077,
978
+ "step": 3925
979
+ },
980
+ {
981
+ "epoch": 23.65,
982
+ "learning_rate": 6.368421052631579e-05,
983
+ "loss": 0.0081,
984
+ "step": 3950
985
+ },
986
+ {
987
+ "epoch": 23.8,
988
+ "learning_rate": 6.342105263157895e-05,
989
+ "loss": 0.0083,
990
+ "step": 3975
991
+ },
992
+ {
993
+ "epoch": 23.95,
994
+ "learning_rate": 6.31578947368421e-05,
995
+ "loss": 0.0091,
996
+ "step": 4000
997
+ },
998
+ {
999
+ "epoch": 23.95,
1000
+ "eval_loss": 0.5618667602539062,
1001
+ "eval_runtime": 582.8261,
1002
+ "eval_samples_per_second": 0.719,
1003
+ "eval_steps_per_second": 0.091,
1004
+ "eval_wer": 38.127715747208626,
1005
+ "step": 4000
1006
  }
1007
  ],
1008
  "logging_steps": 25,
 
1010
  "num_input_tokens_seen": 0,
1011
  "num_train_epochs": 60,
1012
  "save_steps": 1000,
1013
+ "total_flos": 2.1720546423668736e+20,
1014
  "train_batch_size": 16,
1015
  "trial_name": null,
1016
  "trial_params": null