Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.DS_Store +0 -0
config.json +1 -1
generation_config.json +1 -1
model.safetensors +1 -1
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +0 -0
trainer_state.json +493 -0
training_args.bin +1 -1
training_metrics.json +327 -327
vocab.json +0 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

config.json CHANGED Viewed

@@ -43,7 +43,7 @@
   "scale_embedding": false,
   "task": "transcribe",
   "torch_dtype": "float32",
-  "transformers_version": "4.53.3",
   "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51866

   "scale_embedding": false,
   "task": "transcribe",
   "torch_dtype": "float32",
+  "transformers_version": "4.54.0",
   "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51866

generation_config.json CHANGED Viewed

@@ -247,6 +247,6 @@
     "transcribe": 50360,
     "translate": 50359
   },
-  "transformers_version": "4.53.3",
   "use_cache": false
 }

     "transcribe": 50360,
     "translate": 50359
   },
+  "transformers_version": "4.54.0",
   "use_cache": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a25a3c539d1fae4eb5f2793fd614b4895804b5b9ef60dcbda634350bad1f3771
 size 3235581408

 version https://git-lfs.github.com/spec/v1
+oid sha256:86b46c1643177ac43c7cee4227284e161af1fead0db26ca6160f4f9ac071731c
 size 3235581408

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7db07e6b8a1f18ca3c4fc22fc7bddf31df0b16f50a0c280f4b9099acce2d5b34
+size 1375357387

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4b60c94b0982aceab90e3687055c9f5adce5e7bf5c684b99a918022ee83c7c4
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4b5f405af12756483019edea5c184ab91054cbaa03d9d45c3675b74e3fc8557
+size 1465

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,493 @@

+{
+  "best_global_step": 3000,
+  "best_metric": 0.7510406970977783,
+  "best_model_checkpoint": "whisper-turbo-oliver/checkpoint-3000",
+  "epoch": 2.3041474654377883,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03840245775729647,
+      "grad_norm": 9.333383560180664,
+      "learning_rate": 2.4500000000000003e-06,
+      "loss": 2.084,
+      "step": 50
+    },
+    {
+      "epoch": 0.07680491551459294,
+      "grad_norm": 1.5071637630462646,
+      "learning_rate": 4.950000000000001e-06,
+      "loss": 0.9651,
+      "step": 100
+    },
+    {
+      "epoch": 0.1152073732718894,
+      "grad_norm": 1.1114453077316284,
+      "learning_rate": 7.45e-06,
+      "loss": 0.8516,
+      "step": 150
+    },
+    {
+      "epoch": 0.15360983102918588,
+      "grad_norm": 1.903541922569275,
+      "learning_rate": 9.950000000000001e-06,
+      "loss": 0.8221,
+      "step": 200
+    },
+    {
+      "epoch": 0.19201228878648233,
+      "grad_norm": 1.2109239101409912,
+      "learning_rate": 1.2450000000000001e-05,
+      "loss": 0.8073,
+      "step": 250
+    },
+    {
+      "epoch": 0.2304147465437788,
+      "grad_norm": 1.016984224319458,
+      "learning_rate": 1.4950000000000001e-05,
+      "loss": 0.7959,
+      "step": 300
+    },
+    {
+      "epoch": 0.26881720430107525,
+      "grad_norm": 1.1783643960952759,
+      "learning_rate": 1.745e-05,
+      "loss": 0.7902,
+      "step": 350
+    },
+    {
+      "epoch": 0.30721966205837176,
+      "grad_norm": 0.6817651391029358,
+      "learning_rate": 1.995e-05,
+      "loss": 0.7854,
+      "step": 400
+    },
+    {
+      "epoch": 0.3456221198156682,
+      "grad_norm": 0.7436923980712891,
+      "learning_rate": 2.245e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.38402457757296465,
+      "grad_norm": 0.6642886996269226,
+      "learning_rate": 2.495e-05,
+      "loss": 0.7775,
+      "step": 500
+    },
+    {
+      "epoch": 0.42242703533026116,
+      "grad_norm": 0.831900954246521,
+      "learning_rate": 2.7450000000000003e-05,
+      "loss": 0.7761,
+      "step": 550
+    },
+    {
+      "epoch": 0.4608294930875576,
+      "grad_norm": 0.5995694994926453,
+      "learning_rate": 2.995e-05,
+      "loss": 0.7752,
+      "step": 600
+    },
+    {
+      "epoch": 0.49923195084485406,
+      "grad_norm": 0.9126259684562683,
+      "learning_rate": 3.245e-05,
+      "loss": 0.7747,
+      "step": 650
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "grad_norm": 0.6514723300933838,
+      "learning_rate": 3.495e-05,
+      "loss": 0.7724,
+      "step": 700
+    },
+    {
+      "epoch": 0.576036866359447,
+      "grad_norm": 1.5725030899047852,
+      "learning_rate": 3.745e-05,
+      "loss": 0.7697,
+      "step": 750
+    },
+    {
+      "epoch": 0.6144393241167435,
+      "grad_norm": 0.5431535840034485,
+      "learning_rate": 3.995e-05,
+      "loss": 0.7688,
+      "step": 800
+    },
+    {
+      "epoch": 0.65284178187404,
+      "grad_norm": 1.0956439971923828,
+      "learning_rate": 4.245e-05,
+      "loss": 0.7692,
+      "step": 850
+    },
+    {
+      "epoch": 0.6912442396313364,
+      "grad_norm": 0.5050705671310425,
+      "learning_rate": 4.495e-05,
+      "loss": 0.77,
+      "step": 900
+    },
+    {
+      "epoch": 0.7296466973886329,
+      "grad_norm": 0.7762904763221741,
+      "learning_rate": 4.745e-05,
+      "loss": 0.7673,
+      "step": 950
+    },
+    {
+      "epoch": 0.7680491551459293,
+      "grad_norm": 1.0077502727508545,
+      "learning_rate": 4.995e-05,
+      "loss": 0.7676,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7680491551459293,
+      "eval_cer": 0.00500196505770124,
+      "eval_loss": 0.7688098549842834,
+      "eval_runtime": 599.3052,
+      "eval_samples_per_second": 2.835,
+      "eval_steps_per_second": 0.045,
+      "eval_wer": 0.012253144756566513,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 0.40111133456230164,
+      "learning_rate": 4.9997949843345384e-05,
+      "loss": 0.7684,
+      "step": 1050
+    },
+    {
+      "epoch": 0.8448540706605223,
+      "grad_norm": 0.8781216144561768,
+      "learning_rate": 4.999163151231201e-05,
+      "loss": 0.7673,
+      "step": 1100
+    },
+    {
+      "epoch": 0.8832565284178188,
+      "grad_norm": 0.8268694281578064,
+      "learning_rate": 4.998104523202588e-05,
+      "loss": 0.765,
+      "step": 1150
+    },
+    {
+      "epoch": 0.9216589861751152,
+      "grad_norm": 0.518210232257843,
+      "learning_rate": 4.996619281036046e-05,
+      "loss": 0.764,
+      "step": 1200
+    },
+    {
+      "epoch": 0.9600614439324117,
+      "grad_norm": 0.25512072443962097,
+      "learning_rate": 4.9947076783740046e-05,
+      "loss": 0.7625,
+      "step": 1250
+    },
+    {
+      "epoch": 0.9984639016897081,
+      "grad_norm": 0.4513356387615204,
+      "learning_rate": 4.9923700416706686e-05,
+      "loss": 0.7609,
+      "step": 1300
+    },
+    {
+      "epoch": 1.0368663594470047,
+      "grad_norm": 0.2668992877006531,
+      "learning_rate": 4.989606770136262e-05,
+      "loss": 0.7583,
+      "step": 1350
+    },
+    {
+      "epoch": 1.075268817204301,
+      "grad_norm": 0.28391626477241516,
+      "learning_rate": 4.986418335668855e-05,
+      "loss": 0.7562,
+      "step": 1400
+    },
+    {
+      "epoch": 1.1136712749615976,
+      "grad_norm": 0.5757988691329956,
+      "learning_rate": 4.982805282773775e-05,
+      "loss": 0.7565,
+      "step": 1450
+    },
+    {
+      "epoch": 1.1520737327188941,
+      "grad_norm": 0.3012969195842743,
+      "learning_rate": 4.978768228470618e-05,
+      "loss": 0.756,
+      "step": 1500
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 0.366794228553772,
+      "learning_rate": 4.974307862187881e-05,
+      "loss": 0.7566,
+      "step": 1550
+    },
+    {
+      "epoch": 1.228878648233487,
+      "grad_norm": 0.4574221968650818,
+      "learning_rate": 4.969424945645218e-05,
+      "loss": 0.7567,
+      "step": 1600
+    },
+    {
+      "epoch": 1.2672811059907834,
+      "grad_norm": 0.507939338684082,
+      "learning_rate": 4.964120312723362e-05,
+      "loss": 0.7575,
+      "step": 1650
+    },
+    {
+      "epoch": 1.30568356374808,
+      "grad_norm": 0.23039250075817108,
+      "learning_rate": 4.958394869321719e-05,
+      "loss": 0.7574,
+      "step": 1700
+    },
+    {
+      "epoch": 1.3440860215053765,
+      "grad_norm": 0.5227251648902893,
+      "learning_rate": 4.952249593203659e-05,
+      "loss": 0.7557,
+      "step": 1750
+    },
+    {
+      "epoch": 1.3824884792626728,
+      "grad_norm": 0.5544179081916809,
+      "learning_rate": 4.945685533829544e-05,
+      "loss": 0.7547,
+      "step": 1800
+    },
+    {
+      "epoch": 1.4208909370199692,
+      "grad_norm": 0.29488006234169006,
+      "learning_rate": 4.938703812177501e-05,
+      "loss": 0.7557,
+      "step": 1850
+    },
+    {
+      "epoch": 1.4592933947772657,
+      "grad_norm": 0.593917191028595,
+      "learning_rate": 4.9313056205519894e-05,
+      "loss": 0.756,
+      "step": 1900
+    },
+    {
+      "epoch": 1.4976958525345623,
+      "grad_norm": 0.2776937782764435,
+      "learning_rate": 4.923492222380186e-05,
+      "loss": 0.7564,
+      "step": 1950
+    },
+    {
+      "epoch": 1.5360983102918588,
+      "grad_norm": 0.4567047357559204,
+      "learning_rate": 4.915264951996219e-05,
+      "loss": 0.7549,
+      "step": 2000
+    },
+    {
+      "epoch": 1.5360983102918588,
+      "eval_cer": 0.003501375540390868,
+      "eval_loss": 0.7553083896636963,
+      "eval_runtime": 597.0408,
+      "eval_samples_per_second": 2.846,
+      "eval_steps_per_second": 0.045,
+      "eval_wer": 0.008212214038975428,
+      "step": 2000
+    },
+    {
+      "epoch": 1.5745007680491552,
+      "grad_norm": 0.3459112048149109,
+      "learning_rate": 4.906625214413303e-05,
+      "loss": 0.7549,
+      "step": 2050
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 0.8464981913566589,
+      "learning_rate": 4.897574485083792e-05,
+      "loss": 0.7554,
+      "step": 2100
+    },
+    {
+      "epoch": 1.651305683563748,
+      "grad_norm": 0.11875268071889877,
+      "learning_rate": 4.888114309647211e-05,
+      "loss": 0.754,
+      "step": 2150
+    },
+    {
+      "epoch": 1.6897081413210446,
+      "grad_norm": 0.21423658728599548,
+      "learning_rate": 4.878246303666302e-05,
+      "loss": 0.7549,
+      "step": 2200
+    },
+    {
+      "epoch": 1.728110599078341,
+      "grad_norm": 0.3542362451553345,
+      "learning_rate": 4.86797215235112e-05,
+      "loss": 0.7544,
+      "step": 2250
+    },
+    {
+      "epoch": 1.7665130568356375,
+      "grad_norm": 0.6518175005912781,
+      "learning_rate": 4.8572936102712464e-05,
+      "loss": 0.755,
+      "step": 2300
+    },
+    {
+      "epoch": 1.8049155145929339,
+      "grad_norm": 0.37601879239082336,
+      "learning_rate": 4.846212501056149e-05,
+      "loss": 0.7546,
+      "step": 2350
+    },
+    {
+      "epoch": 1.8433179723502304,
+      "grad_norm": 0.8121321201324463,
+      "learning_rate": 4.834730717083754e-05,
+      "loss": 0.7559,
+      "step": 2400
+    },
+    {
+      "epoch": 1.881720430107527,
+      "grad_norm": 0.33677735924720764,
+      "learning_rate": 4.822850219157272e-05,
+      "loss": 0.7552,
+      "step": 2450
+    },
+    {
+      "epoch": 1.9201228878648233,
+      "grad_norm": 0.30549371242523193,
+      "learning_rate": 4.810573036170345e-05,
+      "loss": 0.7538,
+      "step": 2500
+    },
+    {
+      "epoch": 1.9585253456221197,
+      "grad_norm": 0.39071545004844666,
+      "learning_rate": 4.797901264760557e-05,
+      "loss": 0.7535,
+      "step": 2550
+    },
+    {
+      "epoch": 1.9969278033794162,
+      "grad_norm": 0.28000298142433167,
+      "learning_rate": 4.784837068951387e-05,
+      "loss": 0.7533,
+      "step": 2600
+    },
+    {
+      "epoch": 2.035330261136713,
+      "grad_norm": 0.21816645562648773,
+      "learning_rate": 4.7713826797826386e-05,
+      "loss": 0.751,
+      "step": 2650
+    },
+    {
+      "epoch": 2.0737327188940093,
+      "grad_norm": 0.21055564284324646,
+      "learning_rate": 4.7575403949294456e-05,
+      "loss": 0.7515,
+      "step": 2700
+    },
+    {
+      "epoch": 2.1121351766513055,
+      "grad_norm": 0.4243466556072235,
+      "learning_rate": 4.743312578309875e-05,
+      "loss": 0.7511,
+      "step": 2750
+    },
+    {
+      "epoch": 2.150537634408602,
+      "grad_norm": 0.1919689178466797,
+      "learning_rate": 4.7287016596812354e-05,
+      "loss": 0.75,
+      "step": 2800
+    },
+    {
+      "epoch": 2.1889400921658986,
+      "grad_norm": 0.16988199949264526,
+      "learning_rate": 4.713710134225132e-05,
+      "loss": 0.7504,
+      "step": 2850
+    },
+    {
+      "epoch": 2.227342549923195,
+      "grad_norm": 0.21615995466709137,
+      "learning_rate": 4.698340562121354e-05,
+      "loss": 0.75,
+      "step": 2900
+    },
+    {
+      "epoch": 2.2657450076804917,
+      "grad_norm": 0.15211135149002075,
+      "learning_rate": 4.682595568110655e-05,
+      "loss": 0.7499,
+      "step": 2950
+    },
+    {
+      "epoch": 2.3041474654377883,
+      "grad_norm": 0.17576761543750763,
+      "learning_rate": 4.6664778410465194e-05,
+      "loss": 0.7496,
+      "step": 3000
+    },
+    {
+      "epoch": 2.3041474654377883,
+      "eval_cer": 0.0034299188967094217,
+      "eval_loss": 0.7510406970977783,
+      "eval_runtime": 597.6926,
+      "eval_samples_per_second": 2.843,
+      "eval_steps_per_second": 0.045,
+      "eval_wer": 0.007951508831388907,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 13020,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 10,
+        "early_stopping_threshold": 0.001
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.54506878182359e+20,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f4dbf9ed7fa0e275be5d3b4b8168d7d5abb7913b855c67a0016c884b482c4fb
 size 5905

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7abc171d9a2eb02755d24fd8f922bffab8345d133f4d98839e62e96887ddf1f
 size 5905

training_metrics.json CHANGED Viewed

@@ -1,1060 +1,1060 @@
 [
   {
-    "loss": 3.5837,
-    "grad_norm": 31.927167892456055,
     "learning_rate": 4.800000000000001e-07,
     "epoch": 0.009600614439324117,
     "step": 25
   },
   {
-    "loss": 2.9494,
-    "grad_norm": 13.517321586608887,
     "learning_rate": 9.800000000000001e-07,
     "epoch": 0.019201228878648235,
     "step": 50
   },
   {
-    "loss": 2.2297,
-    "grad_norm": 11.44046401977539,
     "learning_rate": 1.48e-06,
     "epoch": 0.02880184331797235,
     "step": 75
   },
   {
-    "loss": 1.7016,
-    "grad_norm": 2.807133674621582,
     "learning_rate": 1.98e-06,
     "epoch": 0.03840245775729647,
     "step": 100
   },
   {
-    "loss": 1.5778,
-    "grad_norm": 1.8624107837677002,
     "learning_rate": 2.4800000000000004e-06,
     "epoch": 0.04800307219662058,
     "step": 125
   },
   {
-    "loss": 1.5518,
-    "grad_norm": 1.7535284757614136,
     "learning_rate": 2.9800000000000003e-06,
     "epoch": 0.0576036866359447,
     "step": 150
   },
   {
-    "loss": 1.5253,
-    "grad_norm": 1.7349804639816284,
     "learning_rate": 3.48e-06,
     "epoch": 0.06720430107526881,
     "step": 175
   },
   {
-    "loss": 1.5081,
-    "grad_norm": 1.990842580795288,
     "learning_rate": 3.980000000000001e-06,
     "epoch": 0.07680491551459294,
     "step": 200
   },
   {
-    "loss": 1.5003,
-    "grad_norm": 1.1042917966842651,
     "learning_rate": 4.48e-06,
     "epoch": 0.08640552995391705,
     "step": 225
   },
   {
-    "loss": 1.4981,
-    "grad_norm": 1.5932936668395996,
     "learning_rate": 4.980000000000001e-06,
     "epoch": 0.09600614439324116,
     "step": 250
   },
   {
-    "loss": 1.4839,
-    "grad_norm": 1.271634578704834,
     "learning_rate": 5.480000000000001e-06,
     "epoch": 0.10560675883256529,
     "step": 275
   },
   {
-    "loss": 1.4802,
-    "grad_norm": 1.1124892234802246,
     "learning_rate": 5.98e-06,
     "epoch": 0.1152073732718894,
     "step": 300
   },
   {
-    "loss": 1.4779,
-    "grad_norm": 1.7024626731872559,
     "learning_rate": 6.480000000000001e-06,
     "epoch": 0.12480798771121351,
     "step": 325
   },
   {
-    "loss": 1.4741,
-    "grad_norm": 1.404011607170105,
     "learning_rate": 6.98e-06,
     "epoch": 0.13440860215053763,
     "step": 350
   },
   {
-    "loss": 1.4686,
-    "grad_norm": 1.750034213066101,
     "learning_rate": 7.48e-06,
     "epoch": 0.14400921658986174,
     "step": 375
   },
   {
-    "loss": 1.4683,
-    "grad_norm": 0.7955018877983093,
     "learning_rate": 7.980000000000002e-06,
     "epoch": 0.15360983102918588,
     "step": 400
   },
   {
-    "loss": 1.463,
-    "grad_norm": 1.526512622833252,
     "learning_rate": 8.48e-06,
     "epoch": 0.16321044546851,
     "step": 425
   },
   {
-    "loss": 1.4609,
-    "grad_norm": 0.947767436504364,
     "learning_rate": 8.98e-06,
     "epoch": 0.1728110599078341,
     "step": 450
   },
   {
-    "loss": 1.4615,
-    "grad_norm": 1.0896203517913818,
     "learning_rate": 9.48e-06,
     "epoch": 0.18241167434715821,
     "step": 475
   },
   {
-    "loss": 1.4612,
-    "grad_norm": 0.834524929523468,
     "learning_rate": 9.980000000000001e-06,
     "epoch": 0.19201228878648233,
     "step": 500
   },
   {
-    "eval_loss": 1.4578877687454224,
-    "eval_wer": 0.019150391367125163,
-    "eval_cer": 0.005926353711189946,
-    "eval_runtime": 180.69,
-    "eval_samples_per_second": 9.408,
-    "eval_steps_per_second": 0.149,
     "epoch": 0.19201228878648233,
     "step": 500
   },
   {
-    "loss": 1.4572,
-    "grad_norm": 1.6100836992263794,
     "learning_rate": 9.990602975724355e-06,
     "epoch": 0.20161290322580644,
     "step": 525
   },
   {
-    "loss": 1.4557,
-    "grad_norm": 1.5937349796295166,
     "learning_rate": 9.980814408770558e-06,
     "epoch": 0.21121351766513058,
     "step": 550
   },
   {
-    "loss": 1.4566,
-    "grad_norm": 0.7914408445358276,
     "learning_rate": 9.971025841816759e-06,
     "epoch": 0.2208141321044547,
     "step": 575
   },
   {
-    "loss": 1.4515,
-    "grad_norm": 0.9205504059791565,
     "learning_rate": 9.96123727486296e-06,
     "epoch": 0.2304147465437788,
     "step": 600
   },
   {
-    "loss": 1.4522,
-    "grad_norm": 0.724983811378479,
     "learning_rate": 9.951448707909162e-06,
     "epoch": 0.24001536098310292,
     "step": 625
   },
   {
-    "loss": 1.4504,
-    "grad_norm": 0.6097744107246399,
     "learning_rate": 9.941660140955365e-06,
     "epoch": 0.24961597542242703,
     "step": 650
   },
   {
-    "loss": 1.4507,
-    "grad_norm": 0.7213621139526367,
     "learning_rate": 9.931871574001566e-06,
     "epoch": 0.25921658986175117,
     "step": 675
   },
   {
-    "loss": 1.4479,
-    "grad_norm": 0.8606914281845093,
     "learning_rate": 9.92208300704777e-06,
     "epoch": 0.26881720430107525,
     "step": 700
   },
   {
-    "loss": 1.4465,
-    "grad_norm": 0.7317363619804382,
     "learning_rate": 9.91229444009397e-06,
     "epoch": 0.2784178187403994,
     "step": 725
   },
   {
-    "loss": 1.4436,
-    "grad_norm": 0.7534998059272766,
     "learning_rate": 9.902505873140174e-06,
     "epoch": 0.2880184331797235,
     "step": 750
   },
   {
-    "loss": 1.4461,
-    "grad_norm": 0.6188538074493408,
     "learning_rate": 9.892717306186375e-06,
     "epoch": 0.2976190476190476,
     "step": 775
   },
   {
-    "loss": 1.4422,
-    "grad_norm": 0.6928197145462036,
     "learning_rate": 9.882928739232577e-06,
     "epoch": 0.30721966205837176,
     "step": 800
   },
   {
-    "loss": 1.4439,
-    "grad_norm": 0.9215940833091736,
     "learning_rate": 9.87314017227878e-06,
     "epoch": 0.31682027649769584,
     "step": 825
   },
   {
-    "loss": 1.4443,
-    "grad_norm": 0.9667465090751648,
     "learning_rate": 9.863351605324981e-06,
     "epoch": 0.32642089093702,
     "step": 850
   },
   {
-    "loss": 1.4424,
-    "grad_norm": 0.7931649088859558,
     "learning_rate": 9.853563038371182e-06,
     "epoch": 0.33602150537634407,
     "step": 875
   },
   {
-    "loss": 1.4402,
-    "grad_norm": 0.45881208777427673,
     "learning_rate": 9.843774471417386e-06,
     "epoch": 0.3456221198156682,
     "step": 900
   },
   {
-    "loss": 1.4425,
-    "grad_norm": 2.3223390579223633,
     "learning_rate": 9.833985904463587e-06,
     "epoch": 0.35522273425499235,
     "step": 925
   },
   {
-    "loss": 1.4409,
-    "grad_norm": 0.7167399525642395,
     "learning_rate": 9.82419733750979e-06,
     "epoch": 0.36482334869431643,
     "step": 950
   },
   {
-    "loss": 1.4385,
-    "grad_norm": 0.45477813482284546,
     "learning_rate": 9.814408770555991e-06,
     "epoch": 0.37442396313364057,
     "step": 975
   },
   {
-    "loss": 1.4399,
-    "grad_norm": 0.6906208992004395,
     "learning_rate": 9.804620203602193e-06,
     "epoch": 0.38402457757296465,
     "step": 1000
   },
   {
-    "eval_loss": 1.4414963722229004,
-    "eval_wer": 0.006839425488258986,
-    "eval_cer": 0.0029700519759095784,
-    "eval_runtime": 180.7728,
-    "eval_samples_per_second": 9.404,
-    "eval_steps_per_second": 0.149,
     "epoch": 0.38402457757296465,
     "step": 1000
   },
   {
-    "loss": 1.4404,
-    "grad_norm": 0.612010657787323,
     "learning_rate": 9.794831636648396e-06,
     "epoch": 0.3936251920122888,
     "step": 1025
   },
   {
-    "loss": 1.437,
-    "grad_norm": 0.6493867635726929,
     "learning_rate": 9.785043069694597e-06,
     "epoch": 0.4032258064516129,
     "step": 1050
   },
   {
-    "loss": 1.4372,
-    "grad_norm": 0.5084114074707031,
     "learning_rate": 9.7752545027408e-06,
     "epoch": 0.412826420890937,
     "step": 1075
   },
   {
-    "loss": 1.4365,
-    "grad_norm": 0.32950568199157715,
     "learning_rate": 9.765465935787002e-06,
     "epoch": 0.42242703533026116,
     "step": 1100
   },
   {
-    "loss": 1.4378,
-    "grad_norm": 0.5013596415519714,
     "learning_rate": 9.755677368833205e-06,
     "epoch": 0.43202764976958524,
     "step": 1125
   },
   {
-    "loss": 1.4356,
-    "grad_norm": 0.5923940539360046,
     "learning_rate": 9.745888801879405e-06,
     "epoch": 0.4416282642089094,
     "step": 1150
   },
   {
-    "loss": 1.4365,
-    "grad_norm": 0.7093729972839355,
     "learning_rate": 9.736100234925608e-06,
     "epoch": 0.45122887864823347,
     "step": 1175
   },
   {
-    "loss": 1.4339,
-    "grad_norm": 0.45401209592819214,
     "learning_rate": 9.726311667971809e-06,
     "epoch": 0.4608294930875576,
     "step": 1200
   },
   {
-    "loss": 1.4365,
-    "grad_norm": 0.8377461433410645,
     "learning_rate": 9.716523101018012e-06,
     "epoch": 0.47043010752688175,
     "step": 1225
   },
   {
-    "loss": 1.4354,
-    "grad_norm": 0.5953696370124817,
     "learning_rate": 9.706734534064213e-06,
     "epoch": 0.48003072196620583,
     "step": 1250
   },
   {
-    "loss": 1.4356,
-    "grad_norm": 0.464432030916214,
     "learning_rate": 9.696945967110417e-06,
     "epoch": 0.48963133640552997,
     "step": 1275
   },
   {
-    "loss": 1.4326,
-    "grad_norm": 0.6318503022193909,
     "learning_rate": 9.687157400156618e-06,
     "epoch": 0.49923195084485406,
     "step": 1300
   },
   {
-    "loss": 1.434,
-    "grad_norm": 1.390724778175354,
     "learning_rate": 9.677368833202821e-06,
     "epoch": 0.5088325652841782,
     "step": 1325
   },
   {
-    "loss": 1.4337,
-    "grad_norm": 0.4549782872200012,
     "learning_rate": 9.66758026624902e-06,
     "epoch": 0.5184331797235023,
     "step": 1350
   },
   {
-    "loss": 1.4345,
-    "grad_norm": 0.607494592666626,
     "learning_rate": 9.657791699295224e-06,
     "epoch": 0.5280337941628265,
     "step": 1375
   },
   {
-    "loss": 1.4345,
-    "grad_norm": 0.49749523401260376,
     "learning_rate": 9.648003132341425e-06,
     "epoch": 0.5376344086021505,
     "step": 1400
   },
   {
-    "loss": 1.4324,
-    "grad_norm": 0.5609452724456787,
     "learning_rate": 9.638214565387628e-06,
     "epoch": 0.5472350230414746,
     "step": 1425
   },
   {
-    "loss": 1.4338,
-    "grad_norm": 0.7919191718101501,
     "learning_rate": 9.62842599843383e-06,
     "epoch": 0.5568356374807988,
     "step": 1450
   },
   {
-    "loss": 1.435,
-    "grad_norm": 0.4627026617527008,
     "learning_rate": 9.618637431480033e-06,
     "epoch": 0.5664362519201229,
     "step": 1475
   },
   {
-    "loss": 1.4338,
-    "grad_norm": 0.7627348303794861,
     "learning_rate": 9.608848864526234e-06,
     "epoch": 0.576036866359447,
     "step": 1500
   },
   {
-    "eval_loss": 1.4339057207107544,
-    "eval_wer": 0.005319553157534767,
-    "eval_cer": 0.002406292110111927,
-    "eval_runtime": 180.3166,
-    "eval_samples_per_second": 9.428,
-    "eval_steps_per_second": 0.15,
     "epoch": 0.576036866359447,
     "step": 1500
   },
   {
-    "loss": 1.432,
-    "grad_norm": 0.4532851576805115,
     "learning_rate": 9.599060297572435e-06,
     "epoch": 0.5856374807987711,
     "step": 1525
   },
   {
-    "loss": 1.4332,
-    "grad_norm": 0.40782368183135986,
     "learning_rate": 9.589271730618639e-06,
     "epoch": 0.5952380952380952,
     "step": 1550
   },
   {
-    "loss": 1.4322,
-    "grad_norm": 5.594751358032227,
     "learning_rate": 9.57948316366484e-06,
     "epoch": 0.6048387096774194,
     "step": 1575
   },
   {
-    "loss": 1.4314,
-    "grad_norm": 0.521115243434906,
     "learning_rate": 9.569694596711043e-06,
     "epoch": 0.6144393241167435,
     "step": 1600
   },
   {
-    "loss": 1.432,
-    "grad_norm": 0.9263116717338562,
     "learning_rate": 9.559906029757244e-06,
     "epoch": 0.6240399385560675,
     "step": 1625
   },
   {
-    "loss": 1.4312,
-    "grad_norm": 0.483038991689682,
     "learning_rate": 9.550117462803446e-06,
     "epoch": 0.6336405529953917,
     "step": 1650
   },
   {
-    "loss": 1.4317,
-    "grad_norm": 0.5169259905815125,
     "learning_rate": 9.540328895849649e-06,
     "epoch": 0.6432411674347158,
     "step": 1675
   },
   {
-    "loss": 1.4319,
-    "grad_norm": 0.6301758885383606,
     "learning_rate": 9.53054032889585e-06,
     "epoch": 0.65284178187404,
     "step": 1700
   },
   {
-    "loss": 1.4309,
-    "grad_norm": 0.43223053216934204,
     "learning_rate": 9.520751761942052e-06,
     "epoch": 0.6624423963133641,
     "step": 1725
   },
   {
-    "loss": 1.4302,
-    "grad_norm": 0.42416876554489136,
     "learning_rate": 9.510963194988255e-06,
     "epoch": 0.6720430107526881,
     "step": 1750
   },
   {
-    "loss": 1.4305,
-    "grad_norm": 0.3732769787311554,
     "learning_rate": 9.501174628034456e-06,
     "epoch": 0.6816436251920123,
     "step": 1775
   },
   {
-    "loss": 1.4293,
-    "grad_norm": 0.3774888813495636,
     "learning_rate": 9.49138606108066e-06,
     "epoch": 0.6912442396313364,
     "step": 1800
   },
   {
-    "loss": 1.4295,
-    "grad_norm": 0.34194567799568176,
     "learning_rate": 9.48159749412686e-06,
     "epoch": 0.7008448540706606,
     "step": 1825
   },
   {
-    "loss": 1.4285,
-    "grad_norm": 0.40550151467323303,
     "learning_rate": 9.471808927173064e-06,
     "epoch": 0.7104454685099847,
     "step": 1850
   },
   {
-    "loss": 1.4283,
-    "grad_norm": 0.46141359210014343,
     "learning_rate": 9.462020360219265e-06,
     "epoch": 0.7200460829493087,
     "step": 1875
   },
   {
-    "loss": 1.4275,
-    "grad_norm": 0.3962096571922302,
     "learning_rate": 9.452231793265466e-06,
     "epoch": 0.7296466973886329,
     "step": 1900
   },
   {
-    "loss": 1.4286,
-    "grad_norm": 0.28840935230255127,
     "learning_rate": 9.442443226311668e-06,
     "epoch": 0.739247311827957,
     "step": 1925
   },
   {
-    "loss": 1.4287,
-    "grad_norm": 1.2259435653686523,
     "learning_rate": 9.432654659357871e-06,
     "epoch": 0.7488479262672811,
     "step": 1950
   },
   {
-    "loss": 1.4301,
-    "grad_norm": 0.49282413721084595,
     "learning_rate": 9.422866092404072e-06,
     "epoch": 0.7584485407066052,
     "step": 1975
   },
   {
-    "loss": 1.4298,
-    "grad_norm": 0.6675013303756714,
     "learning_rate": 9.413077525450275e-06,
     "epoch": 0.7680491551459293,
     "step": 2000
   },
   {
-    "eval_loss": 1.4311903715133667,
-    "eval_wer": 0.006079489322896876,
-    "eval_cer": 0.002708797403954569,
-    "eval_runtime": 181.4592,
-    "eval_samples_per_second": 9.368,
-    "eval_steps_per_second": 0.149,
     "epoch": 0.7680491551459293,
     "step": 2000
   },
   {
-    "loss": 1.4305,
-    "grad_norm": 0.7503907084465027,
     "learning_rate": 9.403288958496477e-06,
     "epoch": 0.7776497695852534,
     "step": 2025
   },
   {
-    "loss": 1.43,
-    "grad_norm": 0.40446972846984863,
     "learning_rate": 9.39350039154268e-06,
     "epoch": 0.7872503840245776,
     "step": 2050
   },
   {
-    "loss": 1.4286,
-    "grad_norm": 0.6334962248802185,
     "learning_rate": 9.383711824588881e-06,
     "epoch": 0.7968509984639017,
     "step": 2075
   },
   {
-    "loss": 1.4281,
-    "grad_norm": 0.4710507094860077,
     "learning_rate": 9.373923257635083e-06,
     "epoch": 0.8064516129032258,
     "step": 2100
   },
   {
-    "loss": 1.4285,
-    "grad_norm": 0.47485098242759705,
     "learning_rate": 9.364134690681284e-06,
     "epoch": 0.8160522273425499,
     "step": 2125
   },
   {
-    "loss": 1.4271,
-    "grad_norm": 0.2644312381744385,
     "learning_rate": 9.354346123727487e-06,
     "epoch": 0.825652841781874,
     "step": 2150
   },
   {
-    "loss": 1.4286,
-    "grad_norm": 0.28581735491752625,
     "learning_rate": 9.344557556773688e-06,
     "epoch": 0.8352534562211982,
     "step": 2175
   },
   {
-    "loss": 1.4281,
-    "grad_norm": 0.34762442111968994,
     "learning_rate": 9.334768989819891e-06,
     "epoch": 0.8448540706605223,
     "step": 2200
   },
   {
-    "loss": 1.4272,
-    "grad_norm": 0.4322021007537842,
     "learning_rate": 9.324980422866093e-06,
     "epoch": 0.8544546850998463,
     "step": 2225
   },
   {
-    "loss": 1.4265,
-    "grad_norm": 0.5740979909896851,
     "learning_rate": 9.315191855912296e-06,
     "epoch": 0.8640552995391705,
     "step": 2250
   },
   {
-    "loss": 1.4262,
-    "grad_norm": 0.41522952914237976,
     "learning_rate": 9.305403288958497e-06,
     "epoch": 0.8736559139784946,
     "step": 2275
   },
   {
-    "loss": 1.4283,
-    "grad_norm": 0.6065871715545654,
     "learning_rate": 9.295614722004699e-06,
     "epoch": 0.8832565284178188,
     "step": 2300
   },
   {
-    "loss": 1.4266,
-    "grad_norm": 0.7368581295013428,
     "learning_rate": 9.285826155050902e-06,
     "epoch": 0.8928571428571429,
     "step": 2325
   },
   {
-    "loss": 1.4286,
-    "grad_norm": 0.4163774251937866,
     "learning_rate": 9.276037588097103e-06,
     "epoch": 0.9024577572964669,
     "step": 2350
   },
   {
-    "loss": 1.4267,
-    "grad_norm": 0.5644450187683105,
     "learning_rate": 9.266249021143305e-06,
     "epoch": 0.9120583717357911,
     "step": 2375
   },
   {
-    "loss": 1.4274,
-    "grad_norm": 0.2744387686252594,
     "learning_rate": 9.256460454189508e-06,
     "epoch": 0.9216589861751152,
     "step": 2400
   },
   {
-    "loss": 1.4271,
-    "grad_norm": 0.42348650097846985,
     "learning_rate": 9.246671887235709e-06,
     "epoch": 0.9312596006144394,
     "step": 2425
   },
   {
-    "loss": 1.4259,
-    "grad_norm": 0.4279247522354126,
     "learning_rate": 9.236883320281912e-06,
     "epoch": 0.9408602150537635,
     "step": 2450
   },
   {
-    "loss": 1.4259,
-    "grad_norm": 0.3883613348007202,
     "learning_rate": 9.227094753328114e-06,
     "epoch": 0.9504608294930875,
     "step": 2475
   },
   {
-    "loss": 1.4257,
-    "grad_norm": 0.6730173230171204,
     "learning_rate": 9.217306186374315e-06,
     "epoch": 0.9600614439324117,
     "step": 2500
   },
   {
-    "eval_loss": 1.4273574352264404,
-    "eval_wer": 0.00471160422524508,
-    "eval_cer": 0.0021862882600445508,
-    "eval_runtime": 180.2256,
-    "eval_samples_per_second": 9.433,
-    "eval_steps_per_second": 0.15,
     "epoch": 0.9600614439324117,
     "step": 2500
   },
   {
-    "loss": 1.4259,
-    "grad_norm": 0.392243355512619,
     "learning_rate": 9.207517619420518e-06,
     "epoch": 0.9696620583717358,
     "step": 2525
   },
   {
-    "loss": 1.4267,
-    "grad_norm": 0.32680612802505493,
     "learning_rate": 9.19772905246672e-06,
     "epoch": 0.9792626728110599,
     "step": 2550
   },
   {
-    "loss": 1.4251,
-    "grad_norm": 0.4720834195613861,
     "learning_rate": 9.187940485512922e-06,
     "epoch": 0.988863287250384,
     "step": 2575
   },
   {
-    "loss": 1.4259,
-    "grad_norm": 0.5003166794776917,
     "learning_rate": 9.178151918559124e-06,
     "epoch": 0.9984639016897081,
     "step": 2600
   },
   {
-    "loss": 1.4236,
-    "grad_norm": 0.3816595673561096,
     "learning_rate": 9.168363351605327e-06,
     "epoch": 1.0080645161290323,
     "step": 2625
   },
   {
-    "loss": 1.424,
-    "grad_norm": 0.5128536224365234,
     "learning_rate": 9.158574784651528e-06,
     "epoch": 1.0176651305683564,
     "step": 2650
   },
   {
-    "loss": 1.4237,
-    "grad_norm": 0.46583881974220276,
     "learning_rate": 9.14878621769773e-06,
     "epoch": 1.0272657450076805,
     "step": 2675
   },
   {
-    "loss": 1.4242,
-    "grad_norm": 0.5510522127151489,
     "learning_rate": 9.138997650743931e-06,
     "epoch": 1.0368663594470047,
     "step": 2700
   },
   {
-    "loss": 1.4256,
-    "grad_norm": 1.018068790435791,
     "learning_rate": 9.129209083790134e-06,
     "epoch": 1.0464669738863288,
     "step": 2725
   },
   {
-    "loss": 1.424,
-    "grad_norm": 0.2575235664844513,
     "learning_rate": 9.119420516836336e-06,
     "epoch": 1.0560675883256527,
     "step": 2750
   },
   {
-    "loss": 1.4247,
-    "grad_norm": 0.2963893711566925,
     "learning_rate": 9.109631949882539e-06,
     "epoch": 1.0656682027649769,
     "step": 2775
   },
   {
-    "loss": 1.4243,
-    "grad_norm": 0.48700642585754395,
     "learning_rate": 9.09984338292874e-06,
     "epoch": 1.075268817204301,
     "step": 2800
   },
   {
-    "loss": 1.4239,
-    "grad_norm": 0.30403944849967957,
     "learning_rate": 9.090054815974943e-06,
     "epoch": 1.0848694316436251,
     "step": 2825
   },
   {
-    "loss": 1.424,
-    "grad_norm": 0.4571227431297302,
     "learning_rate": 9.080266249021143e-06,
     "epoch": 1.0944700460829493,
     "step": 2850
   },
   {
-    "loss": 1.4244,
-    "grad_norm": 0.3448657691478729,
     "learning_rate": 9.070477682067346e-06,
     "epoch": 1.1040706605222734,
     "step": 2875
   },
   {
-    "loss": 1.4243,
-    "grad_norm": 0.7213252186775208,
     "learning_rate": 9.060689115113547e-06,
     "epoch": 1.1136712749615976,
     "step": 2900
   },
   {
-    "loss": 1.4234,
-    "grad_norm": 0.3630754053592682,
     "learning_rate": 9.05090054815975e-06,
     "epoch": 1.1232718894009217,
     "step": 2925
   },
   {
-    "loss": 1.4237,
-    "grad_norm": 0.2607744038105011,
     "learning_rate": 9.041111981205952e-06,
     "epoch": 1.1328725038402458,
     "step": 2950
   },
   {
-    "loss": 1.4255,
-    "grad_norm": 1.6808569431304932,
     "learning_rate": 9.031323414252155e-06,
     "epoch": 1.14247311827957,
     "step": 2975
   },
   {
-    "loss": 1.424,
-    "grad_norm": 0.5143694281578064,
     "learning_rate": 9.021534847298356e-06,
     "epoch": 1.1520737327188941,
     "step": 3000
   },
   {
-    "eval_loss": 1.426444411277771,
-    "eval_wer": 0.004559616992172657,
-    "eval_cer": 0.002296290185078239,
-    "eval_runtime": 181.4246,
-    "eval_samples_per_second": 9.37,
-    "eval_steps_per_second": 0.149,
     "epoch": 1.1520737327188941,
     "step": 3000
   },
   {
-    "loss": 1.4241,
-    "grad_norm": 0.4142398238182068,
     "learning_rate": 9.01174628034456e-06,
     "epoch": 1.161674347158218,
     "step": 3025
   },
   {
-    "loss": 1.4238,
-    "grad_norm": 0.46893906593322754,
     "learning_rate": 9.00195771339076e-06,
     "epoch": 1.1712749615975422,
     "step": 3050
   },
   {
-    "loss": 1.4233,
-    "grad_norm": 0.2761977016925812,
     "learning_rate": 8.992169146436962e-06,
     "epoch": 1.1808755760368663,
     "step": 3075
   },
   {
-    "loss": 1.4225,
-    "grad_norm": 0.3853098154067993,
     "learning_rate": 8.982380579483163e-06,
     "epoch": 1.1904761904761905,
     "step": 3100
   },
   {
-    "loss": 1.4228,
-    "grad_norm": 0.23497086763381958,
     "learning_rate": 8.972592012529366e-06,
     "epoch": 1.2000768049155146,
     "step": 3125
   },
   {
-    "loss": 1.4225,
-    "grad_norm": 0.3737936019897461,
     "learning_rate": 8.962803445575568e-06,
     "epoch": 1.2096774193548387,
     "step": 3150
   },
   {
-    "loss": 1.4241,
-    "grad_norm": 0.7541052103042603,
     "learning_rate": 8.953014878621771e-06,
     "epoch": 1.2192780337941629,
     "step": 3175
   },
   {
-    "loss": 1.4237,
-    "grad_norm": 0.4566754102706909,
     "learning_rate": 8.943226311667972e-06,
     "epoch": 1.228878648233487,
     "step": 3200
   },
   {
-    "loss": 1.423,
-    "grad_norm": 0.31264716386795044,
     "learning_rate": 8.933437744714175e-06,
     "epoch": 1.238479262672811,
     "step": 3225
   },
   {
-    "loss": 1.4226,
-    "grad_norm": 0.38216665387153625,
     "learning_rate": 8.923649177760377e-06,
     "epoch": 1.248079877112135,
     "step": 3250
   },
   {
-    "loss": 1.4222,
-    "grad_norm": 0.2926384508609772,
     "learning_rate": 8.913860610806578e-06,
     "epoch": 1.2576804915514592,
     "step": 3275
   },
   {
-    "loss": 1.4231,
-    "grad_norm": 0.2229822278022766,
     "learning_rate": 8.904072043852781e-06,
     "epoch": 1.2672811059907834,
     "step": 3300
   },
   {
-    "loss": 1.4234,
-    "grad_norm": 0.3581763803958893,
     "learning_rate": 8.894283476898983e-06,
     "epoch": 1.2768817204301075,
     "step": 3325
   },
   {
-    "loss": 1.4231,
-    "grad_norm": 0.21746738255023956,
     "learning_rate": 8.884494909945186e-06,
     "epoch": 1.2864823348694316,
     "step": 3350
   },
   {
-    "loss": 1.4233,
-    "grad_norm": 0.2930072844028473,
     "learning_rate": 8.874706342991387e-06,
     "epoch": 1.2960829493087558,
     "step": 3375
   },
   {
-    "loss": 1.4241,
-    "grad_norm": 0.34126031398773193,
     "learning_rate": 8.864917776037588e-06,
     "epoch": 1.30568356374808,
     "step": 3400
   },
   {
-    "loss": 1.4236,
-    "grad_norm": 0.6360740065574646,
     "learning_rate": 8.85512920908379e-06,
     "epoch": 1.315284178187404,
     "step": 3425
   },
   {
-    "loss": 1.4225,
-    "grad_norm": 0.4061238467693329,
     "learning_rate": 8.845340642129993e-06,
     "epoch": 1.3248847926267282,
     "step": 3450
   },
   {
-    "loss": 1.4227,
-    "grad_norm": 0.2641013264656067,
     "learning_rate": 8.835552075176194e-06,
     "epoch": 1.3344854070660523,
     "step": 3475
   },
   {
-    "loss": 1.423,
-    "grad_norm": 0.49870121479034424,
     "learning_rate": 8.825763508222397e-06,
     "epoch": 1.3440860215053765,
     "step": 3500
   },
   {
-    "eval_loss": 1.4249826669692993,
-    "eval_wer": 0.004559616992172657,
-    "eval_cer": 0.002351291147595083,
-    "eval_runtime": 180.6281,
-    "eval_samples_per_second": 9.412,
-    "eval_steps_per_second": 0.149,
     "epoch": 1.3440860215053765,
     "step": 3500
   },
   {
-    "train_runtime": 3219.4834,
-    "train_samples_per_second": 517.474,
-    "train_steps_per_second": 8.088,
-    "total_flos": 3.8181358092681216e+20,
-    "train_loss": 1.471987566266741,
     "epoch": 1.3440860215053765,
     "step": 3500
   }

 [
   {
+    "loss": 3.3642,
+    "grad_norm": 27.656972885131836,
     "learning_rate": 4.800000000000001e-07,
     "epoch": 0.009600614439324117,
     "step": 25
   },
   {
+    "loss": 2.8525,
+    "grad_norm": 11.508564949035645,
     "learning_rate": 9.800000000000001e-07,
     "epoch": 0.019201228878648235,
     "step": 50
   },
   {
+    "loss": 2.3063,
+    "grad_norm": 8.78321361541748,
     "learning_rate": 1.48e-06,
     "epoch": 0.02880184331797235,
     "step": 75
   },
   {
+    "loss": 1.8411,
+    "grad_norm": 2.794045925140381,
     "learning_rate": 1.98e-06,
     "epoch": 0.03840245775729647,
     "step": 100
   },
   {
+    "loss": 1.6453,
+    "grad_norm": 2.6623477935791016,
     "learning_rate": 2.4800000000000004e-06,
     "epoch": 0.04800307219662058,
     "step": 125
   },
   {
+    "loss": 1.6035,
+    "grad_norm": 1.9311230182647705,
     "learning_rate": 2.9800000000000003e-06,
     "epoch": 0.0576036866359447,
     "step": 150
   },
   {
+    "loss": 1.5725,
+    "grad_norm": 1.5370234251022339,
     "learning_rate": 3.48e-06,
     "epoch": 0.06720430107526881,
     "step": 175
   },
   {
+    "loss": 1.5626,
+    "grad_norm": 2.0659360885620117,
     "learning_rate": 3.980000000000001e-06,
     "epoch": 0.07680491551459294,
     "step": 200
   },
   {
+    "loss": 1.5467,
+    "grad_norm": 2.592463254928589,
     "learning_rate": 4.48e-06,
     "epoch": 0.08640552995391705,
     "step": 225
   },
   {
+    "loss": 1.5359,
+    "grad_norm": 1.7290267944335938,
     "learning_rate": 4.980000000000001e-06,
     "epoch": 0.09600614439324116,
     "step": 250
   },
   {
+    "loss": 1.5246,
+    "grad_norm": 1.2997428178787231,
     "learning_rate": 5.480000000000001e-06,
     "epoch": 0.10560675883256529,
     "step": 275
   },
   {
+    "loss": 1.5175,
+    "grad_norm": 1.8068249225616455,
     "learning_rate": 5.98e-06,
     "epoch": 0.1152073732718894,
     "step": 300
   },
   {
+    "loss": 1.5085,
+    "grad_norm": 2.0698606967926025,
     "learning_rate": 6.480000000000001e-06,
     "epoch": 0.12480798771121351,
     "step": 325
   },
   {
+    "loss": 1.5069,
+    "grad_norm": 1.5639880895614624,
     "learning_rate": 6.98e-06,
     "epoch": 0.13440860215053763,
     "step": 350
   },
   {
+    "loss": 1.5035,
+    "grad_norm": 2.6863772869110107,
     "learning_rate": 7.48e-06,
     "epoch": 0.14400921658986174,
     "step": 375
   },
   {
+    "loss": 1.4968,
+    "grad_norm": 2.0388848781585693,
     "learning_rate": 7.980000000000002e-06,
     "epoch": 0.15360983102918588,
     "step": 400
   },
   {
+    "loss": 1.4922,
+    "grad_norm": 1.2673157453536987,
     "learning_rate": 8.48e-06,
     "epoch": 0.16321044546851,
     "step": 425
   },
   {
+    "loss": 1.4871,
+    "grad_norm": 1.3405632972717285,
     "learning_rate": 8.98e-06,
     "epoch": 0.1728110599078341,
     "step": 450
   },
   {
+    "loss": 1.4895,
+    "grad_norm": 1.068382978439331,
     "learning_rate": 9.48e-06,
     "epoch": 0.18241167434715821,
     "step": 475
   },
   {
+    "loss": 1.4854,
+    "grad_norm": 1.6716822385787964,
     "learning_rate": 9.980000000000001e-06,
     "epoch": 0.19201228878648233,
     "step": 500
   },
   {
+    "eval_loss": 1.481889247894287,
+    "eval_wer": 0.03017662777813987,
+    "eval_cer": 0.011480700751485703,
+    "eval_runtime": 603.2619,
+    "eval_samples_per_second": 2.816,
+    "eval_steps_per_second": 0.045,
     "epoch": 0.19201228878648233,
     "step": 500
   },
   {
+    "loss": 1.4798,
+    "grad_norm": 2.015155553817749,
     "learning_rate": 9.990602975724355e-06,
     "epoch": 0.20161290322580644,
     "step": 525
   },
   {
+    "loss": 1.4766,
+    "grad_norm": 1.353633999824524,
     "learning_rate": 9.980814408770558e-06,
     "epoch": 0.21121351766513058,
     "step": 550
   },
   {
+    "loss": 1.4719,
+    "grad_norm": 1.360000729560852,
     "learning_rate": 9.971025841816759e-06,
     "epoch": 0.2208141321044547,
     "step": 575
   },
   {
+    "loss": 1.4706,
+    "grad_norm": 0.8802452087402344,
     "learning_rate": 9.96123727486296e-06,
     "epoch": 0.2304147465437788,
     "step": 600
   },
   {
+    "loss": 1.4712,
+    "grad_norm": 1.2904715538024902,
     "learning_rate": 9.951448707909162e-06,
     "epoch": 0.24001536098310292,
     "step": 625
   },
   {
+    "loss": 1.4692,
+    "grad_norm": 1.1254265308380127,
     "learning_rate": 9.941660140955365e-06,
     "epoch": 0.24961597542242703,
     "step": 650
   },
   {
+    "loss": 1.4663,
+    "grad_norm": 1.4376908540725708,
     "learning_rate": 9.931871574001566e-06,
     "epoch": 0.25921658986175117,
     "step": 675
   },
   {
+    "loss": 1.4647,
+    "grad_norm": 0.7454094886779785,
     "learning_rate": 9.92208300704777e-06,
     "epoch": 0.26881720430107525,
     "step": 700
   },
   {
+    "loss": 1.4621,
+    "grad_norm": 1.176084280014038,
     "learning_rate": 9.91229444009397e-06,
     "epoch": 0.2784178187403994,
     "step": 725
   },
   {
+    "loss": 1.4617,
+    "grad_norm": 1.0089287757873535,
     "learning_rate": 9.902505873140174e-06,
     "epoch": 0.2880184331797235,
     "step": 750
   },
   {
+    "loss": 1.4586,
+    "grad_norm": 1.1119129657745361,
     "learning_rate": 9.892717306186375e-06,
     "epoch": 0.2976190476190476,
     "step": 775
   },
   {
+    "loss": 1.4633,
+    "grad_norm": 0.7650630474090576,
     "learning_rate": 9.882928739232577e-06,
     "epoch": 0.30721966205837176,
     "step": 800
   },
   {
+    "loss": 1.4617,
+    "grad_norm": 0.9932444095611572,
     "learning_rate": 9.87314017227878e-06,
     "epoch": 0.31682027649769584,
     "step": 825
   },
   {
+    "loss": 1.4573,
+    "grad_norm": 0.85480135679245,
     "learning_rate": 9.863351605324981e-06,
     "epoch": 0.32642089093702,
     "step": 850
   },
   {
+    "loss": 1.4536,
+    "grad_norm": 1.326219081878662,
     "learning_rate": 9.853563038371182e-06,
     "epoch": 0.33602150537634407,
     "step": 875
   },
   {
+    "loss": 1.4558,
+    "grad_norm": 0.8132762908935547,
     "learning_rate": 9.843774471417386e-06,
     "epoch": 0.3456221198156682,
     "step": 900
   },
   {
+    "loss": 1.4533,
+    "grad_norm": 1.0124117136001587,
     "learning_rate": 9.833985904463587e-06,
     "epoch": 0.35522273425499235,
     "step": 925
   },
   {
+    "loss": 1.4516,
+    "grad_norm": 0.9896714091300964,
     "learning_rate": 9.82419733750979e-06,
     "epoch": 0.36482334869431643,
     "step": 950
   },
   {
+    "loss": 1.4514,
+    "grad_norm": 1.136326789855957,
     "learning_rate": 9.814408770555991e-06,
     "epoch": 0.37442396313364057,
     "step": 975
   },
   {
+    "loss": 1.4518,
+    "grad_norm": 0.647108793258667,
     "learning_rate": 9.804620203602193e-06,
     "epoch": 0.38402457757296465,
     "step": 1000
   },
   {
+    "eval_loss": 1.4519319534301758,
+    "eval_wer": 0.017727954115883463,
+    "eval_cer": 0.006859837793418843,
+    "eval_runtime": 602.9922,
+    "eval_samples_per_second": 2.818,
+    "eval_steps_per_second": 0.045,
     "epoch": 0.38402457757296465,
     "step": 1000
   },
   {
+    "loss": 1.4497,
+    "grad_norm": 1.0084048509597778,
     "learning_rate": 9.794831636648396e-06,
     "epoch": 0.3936251920122888,
     "step": 1025
   },
   {
+    "loss": 1.4504,
+    "grad_norm": 0.9754135012626648,
     "learning_rate": 9.785043069694597e-06,
     "epoch": 0.4032258064516129,
     "step": 1050
   },
   {
+    "loss": 1.4507,
+    "grad_norm": 0.6452074646949768,
     "learning_rate": 9.7752545027408e-06,
     "epoch": 0.412826420890937,
     "step": 1075
   },
   {
+    "loss": 1.4488,
+    "grad_norm": 0.8794124126434326,
     "learning_rate": 9.765465935787002e-06,
     "epoch": 0.42242703533026116,
     "step": 1100
   },
   {
+    "loss": 1.4472,
+    "grad_norm": 0.8075922727584839,
     "learning_rate": 9.755677368833205e-06,
     "epoch": 0.43202764976958524,
     "step": 1125
   },
   {
+    "loss": 1.4488,
+    "grad_norm": 0.9166923761367798,
     "learning_rate": 9.745888801879405e-06,
     "epoch": 0.4416282642089094,
     "step": 1150
   },
   {
+    "loss": 1.4479,
+    "grad_norm": 0.8185281753540039,
     "learning_rate": 9.736100234925608e-06,
     "epoch": 0.45122887864823347,
     "step": 1175
   },
   {
+    "loss": 1.4468,
+    "grad_norm": 0.8979498147964478,
     "learning_rate": 9.726311667971809e-06,
     "epoch": 0.4608294930875576,
     "step": 1200
   },
   {
+    "loss": 1.4457,
+    "grad_norm": 0.8547759056091309,
     "learning_rate": 9.716523101018012e-06,
     "epoch": 0.47043010752688175,
     "step": 1225
   },
   {
+    "loss": 1.4483,
+    "grad_norm": 0.816839873790741,
     "learning_rate": 9.706734534064213e-06,
     "epoch": 0.48003072196620583,
     "step": 1250
   },
   {
+    "loss": 1.4457,
+    "grad_norm": 1.0802863836288452,
     "learning_rate": 9.696945967110417e-06,
     "epoch": 0.48963133640552997,
     "step": 1275
   },
   {
+    "loss": 1.4455,
+    "grad_norm": 0.9443736672401428,
     "learning_rate": 9.687157400156618e-06,
     "epoch": 0.49923195084485406,
     "step": 1300
   },
   {
+    "loss": 1.4455,
+    "grad_norm": 0.9043710827827454,
     "learning_rate": 9.677368833202821e-06,
     "epoch": 0.5088325652841782,
     "step": 1325
   },
   {
+    "loss": 1.4434,
+    "grad_norm": 0.3099987208843231,
     "learning_rate": 9.66758026624902e-06,
     "epoch": 0.5184331797235023,
     "step": 1350
   },
   {
+    "loss": 1.4432,
+    "grad_norm": 1.402285099029541,
     "learning_rate": 9.657791699295224e-06,
     "epoch": 0.5280337941628265,
     "step": 1375
   },
   {
+    "loss": 1.4446,
+    "grad_norm": 0.7573044896125793,
     "learning_rate": 9.648003132341425e-06,
     "epoch": 0.5376344086021505,
     "step": 1400
   },
   {
+    "loss": 1.4414,
+    "grad_norm": 1.2929362058639526,
     "learning_rate": 9.638214565387628e-06,
     "epoch": 0.5472350230414746,
     "step": 1425
   },
   {
+    "loss": 1.4392,
+    "grad_norm": 0.8310167193412781,
     "learning_rate": 9.62842599843383e-06,
     "epoch": 0.5568356374807988,
     "step": 1450
   },
   {
+    "loss": 1.4426,
+    "grad_norm": 0.710106611251831,
     "learning_rate": 9.618637431480033e-06,
     "epoch": 0.5664362519201229,
     "step": 1475
   },
   {
+    "loss": 1.4434,
+    "grad_norm": 1.0039565563201904,
     "learning_rate": 9.608848864526234e-06,
     "epoch": 0.576036866359447,
     "step": 1500
   },
   {
+    "eval_loss": 1.4412317276000977,
+    "eval_wer": 0.014925373134328358,
+    "eval_cer": 0.00557361820715281,
+    "eval_runtime": 604.8563,
+    "eval_samples_per_second": 2.809,
+    "eval_steps_per_second": 0.045,
     "epoch": 0.576036866359447,
     "step": 1500
   },
   {
+    "loss": 1.4416,
+    "grad_norm": 0.5386250019073486,
     "learning_rate": 9.599060297572435e-06,
     "epoch": 0.5856374807987711,
     "step": 1525
   },
   {
+    "loss": 1.4405,
+    "grad_norm": 0.6138635277748108,
     "learning_rate": 9.589271730618639e-06,
     "epoch": 0.5952380952380952,
     "step": 1550
   },
   {
+    "loss": 1.4401,
+    "grad_norm": 0.6281514167785645,
     "learning_rate": 9.57948316366484e-06,
     "epoch": 0.6048387096774194,
     "step": 1575
   },
   {
+    "loss": 1.4399,
+    "grad_norm": 0.7416335344314575,
     "learning_rate": 9.569694596711043e-06,
     "epoch": 0.6144393241167435,
     "step": 1600
   },
   {
+    "loss": 1.4389,
+    "grad_norm": 0.6534410119056702,
     "learning_rate": 9.559906029757244e-06,
     "epoch": 0.6240399385560675,
     "step": 1625
   },
   {
+    "loss": 1.4421,
+    "grad_norm": 0.7885088920593262,
     "learning_rate": 9.550117462803446e-06,
     "epoch": 0.6336405529953917,
     "step": 1650
   },
   {
+    "loss": 1.4392,
+    "grad_norm": 0.5605193972587585,
     "learning_rate": 9.540328895849649e-06,
     "epoch": 0.6432411674347158,
     "step": 1675
   },
   {
+    "loss": 1.4374,
+    "grad_norm": 0.5420004725456238,
     "learning_rate": 9.53054032889585e-06,
     "epoch": 0.65284178187404,
     "step": 1700
   },
   {
+    "loss": 1.4398,
+    "grad_norm": 0.5770916938781738,
     "learning_rate": 9.520751761942052e-06,
     "epoch": 0.6624423963133641,
     "step": 1725
   },
   {
+    "loss": 1.4394,
+    "grad_norm": 0.6007382869720459,
     "learning_rate": 9.510963194988255e-06,
     "epoch": 0.6720430107526881,
     "step": 1750
   },
   {
+    "loss": 1.4386,
+    "grad_norm": 0.6682799458503723,
     "learning_rate": 9.501174628034456e-06,
     "epoch": 0.6816436251920123,
     "step": 1775
   },
   {
+    "loss": 1.4387,
+    "grad_norm": 0.5475255846977234,
     "learning_rate": 9.49138606108066e-06,
     "epoch": 0.6912442396313364,
     "step": 1800
   },
   {
+    "loss": 1.4379,
+    "grad_norm": 0.8577839136123657,
     "learning_rate": 9.48159749412686e-06,
     "epoch": 0.7008448540706606,
     "step": 1825
   },
   {
+    "loss": 1.4383,
+    "grad_norm": 0.5306459069252014,
     "learning_rate": 9.471808927173064e-06,
     "epoch": 0.7104454685099847,
     "step": 1850
   },
   {
+    "loss": 1.438,
+    "grad_norm": 0.7568140625953674,
     "learning_rate": 9.462020360219265e-06,
     "epoch": 0.7200460829493087,
     "step": 1875
   },
   {
+    "loss": 1.4376,
+    "grad_norm": 0.561225414276123,
     "learning_rate": 9.452231793265466e-06,
     "epoch": 0.7296466973886329,
     "step": 1900
   },
   {
+    "loss": 1.4367,
+    "grad_norm": 0.8214055299758911,
     "learning_rate": 9.442443226311668e-06,
     "epoch": 0.739247311827957,
     "step": 1925
   },
   {
+    "loss": 1.4355,
+    "grad_norm": 0.40683963894844055,
     "learning_rate": 9.432654659357871e-06,
     "epoch": 0.7488479262672811,
     "step": 1950
   },
   {
+    "loss": 1.4364,
+    "grad_norm": 0.7378409504890442,
     "learning_rate": 9.422866092404072e-06,
     "epoch": 0.7584485407066052,
     "step": 1975
   },
   {
+    "loss": 1.4358,
+    "grad_norm": 0.4747765064239502,
     "learning_rate": 9.413077525450275e-06,
     "epoch": 0.7680491551459293,
     "step": 2000
   },
   {
+    "eval_loss": 1.4366859197616577,
+    "eval_wer": 0.012122792152773251,
+    "eval_cer": 0.004870961210951922,
+    "eval_runtime": 605.1852,
+    "eval_samples_per_second": 2.807,
+    "eval_steps_per_second": 0.045,
     "epoch": 0.7680491551459293,
     "step": 2000
   },
   {
+    "loss": 1.4361,
+    "grad_norm": 0.9131174087524414,
     "learning_rate": 9.403288958496477e-06,
     "epoch": 0.7776497695852534,
     "step": 2025
   },
   {
+    "loss": 1.4359,
+    "grad_norm": 0.6853222846984863,
     "learning_rate": 9.39350039154268e-06,
     "epoch": 0.7872503840245776,
     "step": 2050
   },
   {
+    "loss": 1.4356,
+    "grad_norm": 0.43234285712242126,
     "learning_rate": 9.383711824588881e-06,
     "epoch": 0.7968509984639017,
     "step": 2075
   },
   {
+    "loss": 1.4363,
+    "grad_norm": 0.3734281361103058,
     "learning_rate": 9.373923257635083e-06,
     "epoch": 0.8064516129032258,
     "step": 2100
   },
   {
+    "loss": 1.4363,
+    "grad_norm": 0.689392626285553,
     "learning_rate": 9.364134690681284e-06,
     "epoch": 0.8160522273425499,
     "step": 2125
   },
   {
+    "loss": 1.4359,
+    "grad_norm": 0.5757440328598022,
     "learning_rate": 9.354346123727487e-06,
     "epoch": 0.825652841781874,
     "step": 2150
   },
   {
+    "loss": 1.4345,
+    "grad_norm": 0.7384234666824341,
     "learning_rate": 9.344557556773688e-06,
     "epoch": 0.8352534562211982,
     "step": 2175
   },
   {
+    "loss": 1.4356,
+    "grad_norm": 0.9221552014350891,
     "learning_rate": 9.334768989819891e-06,
     "epoch": 0.8448540706605223,
     "step": 2200
   },
   {
+    "loss": 1.4359,
+    "grad_norm": 0.8016390800476074,
     "learning_rate": 9.324980422866093e-06,
     "epoch": 0.8544546850998463,
     "step": 2225
   },
   {
+    "loss": 1.4339,
+    "grad_norm": 0.4008951783180237,
     "learning_rate": 9.315191855912296e-06,
     "epoch": 0.8640552995391705,
     "step": 2250
   },
   {
+    "loss": 1.4341,
+    "grad_norm": 1.4997563362121582,
     "learning_rate": 9.305403288958497e-06,
     "epoch": 0.8736559139784946,
     "step": 2275
   },
   {
+    "loss": 1.4335,
+    "grad_norm": 0.7560231685638428,
     "learning_rate": 9.295614722004699e-06,
     "epoch": 0.8832565284178188,
     "step": 2300
   },
   {
+    "loss": 1.4342,
+    "grad_norm": 1.0922359228134155,
     "learning_rate": 9.285826155050902e-06,
     "epoch": 0.8928571428571429,
     "step": 2325
   },
   {
+    "loss": 1.4328,
+    "grad_norm": 0.3814384639263153,
     "learning_rate": 9.276037588097103e-06,
     "epoch": 0.9024577572964669,
     "step": 2350
   },
   {
+    "loss": 1.4336,
+    "grad_norm": 0.4015847444534302,
     "learning_rate": 9.266249021143305e-06,
     "epoch": 0.9120583717357911,
     "step": 2375
   },
   {
+    "loss": 1.4328,
+    "grad_norm": 0.6820506453514099,
     "learning_rate": 9.256460454189508e-06,
     "epoch": 0.9216589861751152,
     "step": 2400
   },
   {
+    "loss": 1.433,
+    "grad_norm": 0.7453433871269226,
     "learning_rate": 9.246671887235709e-06,
     "epoch": 0.9312596006144394,
     "step": 2425
   },
   {
+    "loss": 1.4335,
+    "grad_norm": 0.4594730734825134,
     "learning_rate": 9.236883320281912e-06,
     "epoch": 0.9408602150537635,
     "step": 2450
   },
   {
+    "loss": 1.432,
+    "grad_norm": 0.7341485023498535,
     "learning_rate": 9.227094753328114e-06,
     "epoch": 0.9504608294930875,
     "step": 2475
   },
   {
+    "loss": 1.4333,
+    "grad_norm": 0.41172826290130615,
     "learning_rate": 9.217306186374315e-06,
     "epoch": 0.9600614439324117,
     "step": 2500
   },
   {
+    "eval_loss": 1.4331741333007812,
+    "eval_wer": 0.012774555171739555,
+    "eval_cer": 0.005121059463836983,
+    "eval_runtime": 606.0707,
+    "eval_samples_per_second": 2.803,
+    "eval_steps_per_second": 0.045,
     "epoch": 0.9600614439324117,
     "step": 2500
   },
   {
+    "loss": 1.4319,
+    "grad_norm": 0.35165274143218994,
     "learning_rate": 9.207517619420518e-06,
     "epoch": 0.9696620583717358,
     "step": 2525
   },
   {
+    "loss": 1.4316,
+    "grad_norm": 0.5128066539764404,
     "learning_rate": 9.19772905246672e-06,
     "epoch": 0.9792626728110599,
     "step": 2550
   },
   {
+    "loss": 1.4323,
+    "grad_norm": 0.4359879195690155,
     "learning_rate": 9.187940485512922e-06,
     "epoch": 0.988863287250384,
     "step": 2575
   },
   {
+    "loss": 1.4326,
+    "grad_norm": 0.6791874766349792,
     "learning_rate": 9.178151918559124e-06,
     "epoch": 0.9984639016897081,
     "step": 2600
   },
   {
+    "loss": 1.4317,
+    "grad_norm": 0.5291798114776611,
     "learning_rate": 9.168363351605327e-06,
     "epoch": 1.0080645161290323,
     "step": 2625
   },
   {
+    "loss": 1.4306,
+    "grad_norm": 0.5678306221961975,
     "learning_rate": 9.158574784651528e-06,
     "epoch": 1.0176651305683564,
     "step": 2650
   },
   {
+    "loss": 1.4289,
+    "grad_norm": 0.31047800183296204,
     "learning_rate": 9.14878621769773e-06,
     "epoch": 1.0272657450076805,
     "step": 2675
   },
   {
+    "loss": 1.4279,
+    "grad_norm": 0.37818169593811035,
     "learning_rate": 9.138997650743931e-06,
     "epoch": 1.0368663594470047,
     "step": 2700
   },
   {
+    "loss": 1.4283,
+    "grad_norm": 0.33200085163116455,
     "learning_rate": 9.129209083790134e-06,
     "epoch": 1.0464669738863288,
     "step": 2725
   },
   {
+    "loss": 1.4286,
+    "grad_norm": 0.36598828434944153,
     "learning_rate": 9.119420516836336e-06,
     "epoch": 1.0560675883256527,
     "step": 2750
   },
   {
+    "loss": 1.4285,
+    "grad_norm": 0.3898240923881531,
     "learning_rate": 9.109631949882539e-06,
     "epoch": 1.0656682027649769,
     "step": 2775
   },
   {
+    "loss": 1.4283,
+    "grad_norm": 0.3763328790664673,
     "learning_rate": 9.09984338292874e-06,
     "epoch": 1.075268817204301,
     "step": 2800
   },
   {
+    "loss": 1.427,
+    "grad_norm": 0.5297687649726868,
     "learning_rate": 9.090054815974943e-06,
     "epoch": 1.0848694316436251,
     "step": 2825
   },
   {
+    "loss": 1.4291,
+    "grad_norm": 0.5956099629402161,
     "learning_rate": 9.080266249021143e-06,
     "epoch": 1.0944700460829493,
     "step": 2850
   },
   {
+    "loss": 1.4277,
+    "grad_norm": 0.3517364263534546,
     "learning_rate": 9.070477682067346e-06,
     "epoch": 1.1040706605222734,
     "step": 2875
   },
   {
+    "loss": 1.428,
+    "grad_norm": 0.47178784012794495,
     "learning_rate": 9.060689115113547e-06,
     "epoch": 1.1136712749615976,
     "step": 2900
   },
   {
+    "loss": 1.428,
+    "grad_norm": 0.4502784013748169,
     "learning_rate": 9.05090054815975e-06,
     "epoch": 1.1232718894009217,
     "step": 2925
   },
   {
+    "loss": 1.428,
+    "grad_norm": 0.32381555438041687,
     "learning_rate": 9.041111981205952e-06,
     "epoch": 1.1328725038402458,
     "step": 2950
   },
   {
+    "loss": 1.4278,
+    "grad_norm": 0.4513380527496338,
     "learning_rate": 9.031323414252155e-06,
     "epoch": 1.14247311827957,
     "step": 2975
   },
   {
+    "loss": 1.4284,
+    "grad_norm": 0.2871541976928711,
     "learning_rate": 9.021534847298356e-06,
     "epoch": 1.1520737327188941,
     "step": 3000
   },
   {
+    "eval_loss": 1.430253505706787,
+    "eval_wer": 0.012383497360359774,
+    "eval_cer": 0.004775685686043326,
+    "eval_runtime": 605.0894,
+    "eval_samples_per_second": 2.808,
+    "eval_steps_per_second": 0.045,
     "epoch": 1.1520737327188941,
     "step": 3000
   },
   {
+    "loss": 1.4282,
+    "grad_norm": 0.3893296718597412,
     "learning_rate": 9.01174628034456e-06,
     "epoch": 1.161674347158218,
     "step": 3025
   },
   {
+    "loss": 1.4281,
+    "grad_norm": 0.3037892282009125,
     "learning_rate": 9.00195771339076e-06,
     "epoch": 1.1712749615975422,
     "step": 3050
   },
   {
+    "loss": 1.4277,
+    "grad_norm": 0.5750548243522644,
     "learning_rate": 8.992169146436962e-06,
     "epoch": 1.1808755760368663,
     "step": 3075
   },
   {
+    "loss": 1.428,
+    "grad_norm": 0.6014561653137207,
     "learning_rate": 8.982380579483163e-06,
     "epoch": 1.1904761904761905,
     "step": 3100
   },
   {
+    "loss": 1.4278,
+    "grad_norm": 0.42518579959869385,
     "learning_rate": 8.972592012529366e-06,
     "epoch": 1.2000768049155146,
     "step": 3125
   },
   {
+    "loss": 1.4268,
+    "grad_norm": 0.3365946412086487,
     "learning_rate": 8.962803445575568e-06,
     "epoch": 1.2096774193548387,
     "step": 3150
   },
   {
+    "loss": 1.4279,
+    "grad_norm": 0.7084966897964478,
     "learning_rate": 8.953014878621771e-06,
     "epoch": 1.2192780337941629,
     "step": 3175
   },
   {
+    "loss": 1.4273,
+    "grad_norm": 0.4307415783405304,
     "learning_rate": 8.943226311667972e-06,
     "epoch": 1.228878648233487,
     "step": 3200
   },
   {
+    "loss": 1.4279,
+    "grad_norm": 0.40840864181518555,
     "learning_rate": 8.933437744714175e-06,
     "epoch": 1.238479262672811,
     "step": 3225
   },
   {
+    "loss": 1.4275,
+    "grad_norm": 0.34653913974761963,
     "learning_rate": 8.923649177760377e-06,
     "epoch": 1.248079877112135,
     "step": 3250
   },
   {
+    "loss": 1.427,
+    "grad_norm": 0.5619207620620728,
     "learning_rate": 8.913860610806578e-06,
     "epoch": 1.2576804915514592,
     "step": 3275
   },
   {
+    "loss": 1.4278,
+    "grad_norm": 0.9659692049026489,
     "learning_rate": 8.904072043852781e-06,
     "epoch": 1.2672811059907834,
     "step": 3300
   },
   {
+    "loss": 1.4271,
+    "grad_norm": 0.5404800176620483,
     "learning_rate": 8.894283476898983e-06,
     "epoch": 1.2768817204301075,
     "step": 3325
   },
   {
+    "loss": 1.4277,
+    "grad_norm": 0.5914152264595032,
     "learning_rate": 8.884494909945186e-06,
     "epoch": 1.2864823348694316,
     "step": 3350
   },
   {
+    "loss": 1.4291,
+    "grad_norm": 0.6599079966545105,
     "learning_rate": 8.874706342991387e-06,
     "epoch": 1.2960829493087558,
     "step": 3375
   },
   {
+    "loss": 1.4268,
+    "grad_norm": 0.3193410336971283,
     "learning_rate": 8.864917776037588e-06,
     "epoch": 1.30568356374808,
     "step": 3400
   },
   {
+    "loss": 1.4267,
+    "grad_norm": 0.4547780454158783,
     "learning_rate": 8.85512920908379e-06,
     "epoch": 1.315284178187404,
     "step": 3425
   },
   {
+    "loss": 1.4263,
+    "grad_norm": 0.3442824184894562,
     "learning_rate": 8.845340642129993e-06,
     "epoch": 1.3248847926267282,
     "step": 3450
   },
   {
+    "loss": 1.4266,
+    "grad_norm": 0.41667816042900085,
     "learning_rate": 8.835552075176194e-06,
     "epoch": 1.3344854070660523,
     "step": 3475
   },
   {
+    "loss": 1.4278,
+    "grad_norm": 0.35117366909980774,
     "learning_rate": 8.825763508222397e-06,
     "epoch": 1.3440860215053765,
     "step": 3500
   },
   {
+    "eval_loss": 1.428139328956604,
+    "eval_wer": 0.01016750309587434,
+    "eval_cer": 0.004275489180273203,
+    "eval_runtime": 604.2764,
+    "eval_samples_per_second": 2.812,
+    "eval_steps_per_second": 0.045,
     "epoch": 1.3440860215053765,
     "step": 3500
   },
   {
+    "train_runtime": 17120.6339,
+    "train_samples_per_second": 97.309,
+    "train_steps_per_second": 1.521,
+    "total_flos": 3.818118759756595e+20,
+    "train_loss": 1.482932498386928,
     "epoch": 1.3440860215053765,
     "step": 3500
   }

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff