{ "best_metric": 4.054176686782522, "best_model_checkpoint": "./raid/liyan/whisper/whisper-medium-mls_sjt/checkpoint-7500", "epoch": 2.5737817433081673, "global_step": 7500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "loss": 0.4039, "step": 25 }, { "epoch": 0.02, "learning_rate": 9.800000000000001e-07, "loss": 0.3669, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.46e-06, "loss": 0.3318, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.9600000000000003e-06, "loss": 0.3159, "step": 100 }, { "epoch": 0.04, "learning_rate": 2.46e-06, "loss": 0.296, "step": 125 }, { "epoch": 0.05, "learning_rate": 2.96e-06, "loss": 0.3047, "step": 150 }, { "epoch": 0.06, "learning_rate": 3.46e-06, "loss": 0.2821, "step": 175 }, { "epoch": 0.07, "learning_rate": 3.96e-06, "loss": 0.2893, "step": 200 }, { "epoch": 0.08, "learning_rate": 4.4600000000000005e-06, "loss": 0.2679, "step": 225 }, { "epoch": 0.09, "learning_rate": 4.960000000000001e-06, "loss": 0.2591, "step": 250 }, { "epoch": 0.09, "learning_rate": 5.460000000000001e-06, "loss": 0.2453, "step": 275 }, { "epoch": 0.1, "learning_rate": 5.9600000000000005e-06, "loss": 0.2507, "step": 300 }, { "epoch": 0.11, "learning_rate": 6.460000000000001e-06, "loss": 0.2435, "step": 325 }, { "epoch": 0.12, "learning_rate": 6.96e-06, "loss": 0.2305, "step": 350 }, { "epoch": 0.13, "learning_rate": 7.4600000000000006e-06, "loss": 0.2224, "step": 375 }, { "epoch": 0.14, "learning_rate": 7.960000000000002e-06, "loss": 0.2443, "step": 400 }, { "epoch": 0.15, "learning_rate": 8.46e-06, "loss": 0.2443, "step": 425 }, { "epoch": 0.15, "learning_rate": 8.96e-06, "loss": 0.2292, "step": 450 }, { "epoch": 0.16, "learning_rate": 9.460000000000001e-06, "loss": 0.2445, "step": 475 }, { "epoch": 0.17, "learning_rate": 9.960000000000001e-06, "loss": 0.2463, "step": 500 }, { "epoch": 0.17, "eval_loss": 0.14288775622844696, "eval_runtime": 13972.8923, "eval_samples_per_second": 1.601, "eval_steps_per_second": 0.067, "eval_wer": 5.199872952493307, "step": 500 }, { "epoch": 0.18, "learning_rate": 9.99220338983051e-06, "loss": 0.2158, "step": 525 }, { "epoch": 0.19, "learning_rate": 9.983728813559322e-06, "loss": 0.2302, "step": 550 }, { "epoch": 0.2, "learning_rate": 9.975254237288137e-06, "loss": 0.2223, "step": 575 }, { "epoch": 0.21, "learning_rate": 9.96677966101695e-06, "loss": 0.216, "step": 600 }, { "epoch": 0.21, "learning_rate": 9.958305084745764e-06, "loss": 0.2111, "step": 625 }, { "epoch": 0.22, "learning_rate": 9.949830508474577e-06, "loss": 0.2208, "step": 650 }, { "epoch": 0.23, "learning_rate": 9.94135593220339e-06, "loss": 0.2254, "step": 675 }, { "epoch": 0.24, "learning_rate": 9.932881355932205e-06, "loss": 0.2008, "step": 700 }, { "epoch": 0.25, "learning_rate": 9.924406779661018e-06, "loss": 0.2284, "step": 725 }, { "epoch": 0.26, "learning_rate": 9.915932203389831e-06, "loss": 0.2193, "step": 750 }, { "epoch": 0.27, "learning_rate": 9.907457627118646e-06, "loss": 0.2049, "step": 775 }, { "epoch": 0.27, "learning_rate": 9.898983050847459e-06, "loss": 0.2048, "step": 800 }, { "epoch": 0.28, "learning_rate": 9.890508474576272e-06, "loss": 0.2272, "step": 825 }, { "epoch": 0.29, "learning_rate": 9.882033898305086e-06, "loss": 0.2096, "step": 850 }, { "epoch": 0.3, "learning_rate": 9.8735593220339e-06, "loss": 0.2078, "step": 875 }, { "epoch": 0.31, "learning_rate": 9.865084745762712e-06, "loss": 0.206, "step": 900 }, { "epoch": 0.32, "learning_rate": 9.856610169491525e-06, "loss": 0.211, "step": 925 }, { "epoch": 0.33, "learning_rate": 9.84813559322034e-06, "loss": 0.2239, "step": 950 }, { "epoch": 0.33, "learning_rate": 9.839661016949153e-06, "loss": 0.2099, "step": 975 }, { "epoch": 0.34, "learning_rate": 9.831186440677966e-06, "loss": 0.208, "step": 1000 }, { "epoch": 0.34, "eval_loss": 0.1417330801486969, "eval_runtime": 14048.5832, "eval_samples_per_second": 1.592, "eval_steps_per_second": 0.066, "eval_wer": 5.172194745678116, "step": 1000 }, { "epoch": 0.35, "learning_rate": 9.82271186440678e-06, "loss": 0.2234, "step": 1025 }, { "epoch": 0.36, "learning_rate": 9.814237288135594e-06, "loss": 0.1974, "step": 1050 }, { "epoch": 0.37, "learning_rate": 9.805762711864407e-06, "loss": 0.2175, "step": 1075 }, { "epoch": 0.38, "learning_rate": 9.797288135593221e-06, "loss": 0.207, "step": 1100 }, { "epoch": 0.39, "learning_rate": 9.788813559322034e-06, "loss": 0.1922, "step": 1125 }, { "epoch": 0.39, "learning_rate": 9.780338983050847e-06, "loss": 0.2128, "step": 1150 }, { "epoch": 0.4, "learning_rate": 9.771864406779662e-06, "loss": 0.2038, "step": 1175 }, { "epoch": 0.41, "learning_rate": 9.763389830508475e-06, "loss": 0.1996, "step": 1200 }, { "epoch": 0.42, "learning_rate": 9.75491525423729e-06, "loss": 0.1982, "step": 1225 }, { "epoch": 0.43, "learning_rate": 9.746440677966103e-06, "loss": 0.2026, "step": 1250 }, { "epoch": 0.44, "learning_rate": 9.737966101694916e-06, "loss": 0.2056, "step": 1275 }, { "epoch": 0.45, "learning_rate": 9.72949152542373e-06, "loss": 0.2038, "step": 1300 }, { "epoch": 0.45, "learning_rate": 9.721016949152543e-06, "loss": 0.1906, "step": 1325 }, { "epoch": 0.46, "learning_rate": 9.712542372881358e-06, "loss": 0.1953, "step": 1350 }, { "epoch": 0.47, "learning_rate": 9.70406779661017e-06, "loss": 0.1952, "step": 1375 }, { "epoch": 0.48, "learning_rate": 9.695593220338984e-06, "loss": 0.1988, "step": 1400 }, { "epoch": 0.49, "learning_rate": 9.687118644067798e-06, "loss": 0.1956, "step": 1425 }, { "epoch": 0.5, "learning_rate": 9.678644067796611e-06, "loss": 0.19, "step": 1450 }, { "epoch": 0.51, "learning_rate": 9.670169491525424e-06, "loss": 0.2183, "step": 1475 }, { "epoch": 0.51, "learning_rate": 9.661694915254239e-06, "loss": 0.1744, "step": 1500 }, { "epoch": 0.51, "eval_loss": 0.1370771825313568, "eval_runtime": 14137.0887, "eval_samples_per_second": 1.582, "eval_steps_per_second": 0.066, "eval_wer": 4.992967013022369, "step": 1500 }, { "epoch": 0.52, "learning_rate": 9.653220338983052e-06, "loss": 0.2261, "step": 1525 }, { "epoch": 0.53, "learning_rate": 9.644745762711865e-06, "loss": 0.2001, "step": 1550 }, { "epoch": 0.54, "learning_rate": 9.636271186440678e-06, "loss": 0.1984, "step": 1575 }, { "epoch": 0.55, "learning_rate": 9.627796610169493e-06, "loss": 0.2003, "step": 1600 }, { "epoch": 0.56, "learning_rate": 9.619322033898306e-06, "loss": 0.1771, "step": 1625 }, { "epoch": 0.57, "learning_rate": 9.610847457627119e-06, "loss": 0.1979, "step": 1650 }, { "epoch": 0.57, "learning_rate": 9.602372881355932e-06, "loss": 0.2013, "step": 1675 }, { "epoch": 0.58, "learning_rate": 9.593898305084746e-06, "loss": 0.1899, "step": 1700 }, { "epoch": 0.59, "learning_rate": 9.58542372881356e-06, "loss": 0.1782, "step": 1725 }, { "epoch": 0.6, "learning_rate": 9.576949152542374e-06, "loss": 0.1689, "step": 1750 }, { "epoch": 0.61, "learning_rate": 9.568474576271187e-06, "loss": 0.1985, "step": 1775 }, { "epoch": 0.62, "learning_rate": 9.56e-06, "loss": 0.1788, "step": 1800 }, { "epoch": 0.63, "learning_rate": 9.551525423728815e-06, "loss": 0.186, "step": 1825 }, { "epoch": 0.63, "learning_rate": 9.543050847457628e-06, "loss": 0.1873, "step": 1850 }, { "epoch": 0.64, "learning_rate": 9.53457627118644e-06, "loss": 0.1944, "step": 1875 }, { "epoch": 0.65, "learning_rate": 9.526101694915255e-06, "loss": 0.1954, "step": 1900 }, { "epoch": 0.66, "learning_rate": 9.517627118644068e-06, "loss": 0.1948, "step": 1925 }, { "epoch": 0.67, "learning_rate": 9.509152542372883e-06, "loss": 0.1785, "step": 1950 }, { "epoch": 0.68, "learning_rate": 9.500677966101696e-06, "loss": 0.1795, "step": 1975 }, { "epoch": 0.69, "learning_rate": 9.492203389830509e-06, "loss": 0.1821, "step": 2000 }, { "epoch": 0.69, "eval_loss": 0.13245970010757446, "eval_runtime": 13962.1433, "eval_samples_per_second": 1.602, "eval_steps_per_second": 0.067, "eval_wer": 4.777893733835473, "step": 2000 }, { "epoch": 0.69, "learning_rate": 9.483728813559324e-06, "loss": 0.1895, "step": 2025 }, { "epoch": 0.7, "learning_rate": 9.475254237288137e-06, "loss": 0.1693, "step": 2050 }, { "epoch": 0.71, "learning_rate": 9.466779661016951e-06, "loss": 0.1791, "step": 2075 }, { "epoch": 0.72, "learning_rate": 9.458305084745764e-06, "loss": 0.1889, "step": 2100 }, { "epoch": 0.73, "learning_rate": 9.449830508474577e-06, "loss": 0.17, "step": 2125 }, { "epoch": 0.74, "learning_rate": 9.44135593220339e-06, "loss": 0.2053, "step": 2150 }, { "epoch": 0.75, "learning_rate": 9.432881355932205e-06, "loss": 0.1754, "step": 2175 }, { "epoch": 0.75, "learning_rate": 9.424406779661018e-06, "loss": 0.1791, "step": 2200 }, { "epoch": 0.76, "learning_rate": 9.41593220338983e-06, "loss": 0.1901, "step": 2225 }, { "epoch": 0.77, "learning_rate": 9.407457627118645e-06, "loss": 0.1682, "step": 2250 }, { "epoch": 0.78, "learning_rate": 9.398983050847458e-06, "loss": 0.176, "step": 2275 }, { "epoch": 0.79, "learning_rate": 9.390508474576271e-06, "loss": 0.1846, "step": 2300 }, { "epoch": 0.8, "learning_rate": 9.382033898305084e-06, "loss": 0.1789, "step": 2325 }, { "epoch": 0.81, "learning_rate": 9.373559322033899e-06, "loss": 0.1645, "step": 2350 }, { "epoch": 0.82, "learning_rate": 9.365084745762712e-06, "loss": 0.1772, "step": 2375 }, { "epoch": 0.82, "learning_rate": 9.356610169491525e-06, "loss": 0.1652, "step": 2400 }, { "epoch": 0.83, "learning_rate": 9.34813559322034e-06, "loss": 0.1752, "step": 2425 }, { "epoch": 0.84, "learning_rate": 9.339661016949153e-06, "loss": 0.1864, "step": 2450 }, { "epoch": 0.85, "learning_rate": 9.331186440677967e-06, "loss": 0.17, "step": 2475 }, { "epoch": 0.86, "learning_rate": 9.32271186440678e-06, "loss": 0.1705, "step": 2500 }, { "epoch": 0.86, "eval_loss": 0.13045215606689453, "eval_runtime": 14199.68, "eval_samples_per_second": 1.575, "eval_steps_per_second": 0.066, "eval_wer": 4.807840646127319, "step": 2500 }, { "epoch": 0.87, "learning_rate": 9.314237288135593e-06, "loss": 0.1801, "step": 2525 }, { "epoch": 0.88, "learning_rate": 9.305762711864408e-06, "loss": 0.1708, "step": 2550 }, { "epoch": 0.88, "learning_rate": 9.297288135593221e-06, "loss": 0.1831, "step": 2575 }, { "epoch": 0.89, "learning_rate": 9.288813559322036e-06, "loss": 0.1803, "step": 2600 }, { "epoch": 0.9, "learning_rate": 9.280338983050849e-06, "loss": 0.1787, "step": 2625 }, { "epoch": 0.91, "learning_rate": 9.271864406779662e-06, "loss": 0.1561, "step": 2650 }, { "epoch": 0.92, "learning_rate": 9.263389830508476e-06, "loss": 0.1632, "step": 2675 }, { "epoch": 0.93, "learning_rate": 9.25491525423729e-06, "loss": 0.1867, "step": 2700 }, { "epoch": 0.94, "learning_rate": 9.246440677966102e-06, "loss": 0.1767, "step": 2725 }, { "epoch": 0.94, "learning_rate": 9.237966101694917e-06, "loss": 0.1662, "step": 2750 }, { "epoch": 0.95, "learning_rate": 9.22949152542373e-06, "loss": 0.179, "step": 2775 }, { "epoch": 0.96, "learning_rate": 9.221016949152543e-06, "loss": 0.1696, "step": 2800 }, { "epoch": 0.97, "learning_rate": 9.212542372881358e-06, "loss": 0.1697, "step": 2825 }, { "epoch": 0.98, "learning_rate": 9.20406779661017e-06, "loss": 0.1776, "step": 2850 }, { "epoch": 0.99, "learning_rate": 9.195593220338984e-06, "loss": 0.1624, "step": 2875 }, { "epoch": 1.0, "learning_rate": 9.187118644067796e-06, "loss": 0.1629, "step": 2900 }, { "epoch": 1.0, "learning_rate": 9.178644067796611e-06, "loss": 0.1614, "step": 2925 }, { "epoch": 1.01, "learning_rate": 9.170169491525424e-06, "loss": 0.1087, "step": 2950 }, { "epoch": 1.02, "learning_rate": 9.161694915254237e-06, "loss": 0.1122, "step": 2975 }, { "epoch": 1.03, "learning_rate": 9.153220338983052e-06, "loss": 0.1069, "step": 3000 }, { "epoch": 1.03, "eval_loss": 0.1263168603181839, "eval_runtime": 13889.4582, "eval_samples_per_second": 1.61, "eval_steps_per_second": 0.067, "eval_wer": 4.3962974726620985, "step": 3000 }, { "epoch": 1.04, "learning_rate": 9.144745762711865e-06, "loss": 0.1051, "step": 3025 }, { "epoch": 1.05, "learning_rate": 9.136271186440678e-06, "loss": 0.103, "step": 3050 }, { "epoch": 1.06, "learning_rate": 9.127796610169492e-06, "loss": 0.1039, "step": 3075 }, { "epoch": 1.06, "learning_rate": 9.119322033898305e-06, "loss": 0.1113, "step": 3100 }, { "epoch": 1.07, "learning_rate": 9.110847457627118e-06, "loss": 0.1092, "step": 3125 }, { "epoch": 1.08, "learning_rate": 9.102372881355933e-06, "loss": 0.1272, "step": 3150 }, { "epoch": 1.09, "learning_rate": 9.093898305084746e-06, "loss": 0.1079, "step": 3175 }, { "epoch": 1.1, "learning_rate": 9.08542372881356e-06, "loss": 0.1137, "step": 3200 }, { "epoch": 1.11, "learning_rate": 9.076949152542374e-06, "loss": 0.1097, "step": 3225 }, { "epoch": 1.12, "learning_rate": 9.068474576271187e-06, "loss": 0.1127, "step": 3250 }, { "epoch": 1.12, "learning_rate": 9.060000000000001e-06, "loss": 0.1139, "step": 3275 }, { "epoch": 1.13, "learning_rate": 9.051525423728814e-06, "loss": 0.1059, "step": 3300 }, { "epoch": 1.14, "learning_rate": 9.043050847457629e-06, "loss": 0.1103, "step": 3325 }, { "epoch": 1.15, "learning_rate": 9.034576271186442e-06, "loss": 0.1238, "step": 3350 }, { "epoch": 1.16, "learning_rate": 9.026101694915255e-06, "loss": 0.1008, "step": 3375 }, { "epoch": 1.17, "learning_rate": 9.01762711864407e-06, "loss": 0.1077, "step": 3400 }, { "epoch": 1.18, "learning_rate": 9.009152542372883e-06, "loss": 0.1086, "step": 3425 }, { "epoch": 1.18, "learning_rate": 9.000677966101696e-06, "loss": 0.1126, "step": 3450 }, { "epoch": 1.19, "learning_rate": 8.99220338983051e-06, "loss": 0.1151, "step": 3475 }, { "epoch": 1.2, "learning_rate": 8.983728813559323e-06, "loss": 0.1098, "step": 3500 }, { "epoch": 1.2, "eval_loss": 0.12618736922740936, "eval_runtime": 14301.962, "eval_samples_per_second": 1.564, "eval_steps_per_second": 0.065, "eval_wer": 4.478878352012342, "step": 3500 }, { "epoch": 1.21, "learning_rate": 8.975254237288136e-06, "loss": 0.1181, "step": 3525 }, { "epoch": 1.22, "learning_rate": 8.96677966101695e-06, "loss": 0.1102, "step": 3550 }, { "epoch": 1.23, "learning_rate": 8.958305084745764e-06, "loss": 0.1046, "step": 3575 }, { "epoch": 1.24, "learning_rate": 8.949830508474577e-06, "loss": 0.1146, "step": 3600 }, { "epoch": 1.24, "learning_rate": 8.94135593220339e-06, "loss": 0.1192, "step": 3625 }, { "epoch": 1.25, "learning_rate": 8.932881355932203e-06, "loss": 0.1087, "step": 3650 }, { "epoch": 1.26, "learning_rate": 8.924406779661017e-06, "loss": 0.1143, "step": 3675 }, { "epoch": 1.27, "learning_rate": 8.91593220338983e-06, "loss": 0.1039, "step": 3700 }, { "epoch": 1.28, "learning_rate": 8.907457627118645e-06, "loss": 0.1194, "step": 3725 }, { "epoch": 1.29, "learning_rate": 8.898983050847458e-06, "loss": 0.1128, "step": 3750 }, { "epoch": 1.3, "learning_rate": 8.890508474576271e-06, "loss": 0.108, "step": 3775 }, { "epoch": 1.3, "learning_rate": 8.882033898305086e-06, "loss": 0.1049, "step": 3800 }, { "epoch": 1.31, "learning_rate": 8.873559322033899e-06, "loss": 0.1253, "step": 3825 }, { "epoch": 1.32, "learning_rate": 8.865084745762712e-06, "loss": 0.1165, "step": 3850 }, { "epoch": 1.33, "learning_rate": 8.856610169491526e-06, "loss": 0.1189, "step": 3875 }, { "epoch": 1.34, "learning_rate": 8.84813559322034e-06, "loss": 0.1115, "step": 3900 }, { "epoch": 1.35, "learning_rate": 8.839661016949154e-06, "loss": 0.1214, "step": 3925 }, { "epoch": 1.36, "learning_rate": 8.831186440677967e-06, "loss": 0.101, "step": 3950 }, { "epoch": 1.36, "learning_rate": 8.82271186440678e-06, "loss": 0.1263, "step": 3975 }, { "epoch": 1.37, "learning_rate": 8.814237288135595e-06, "loss": 0.1059, "step": 4000 }, { "epoch": 1.37, "eval_loss": 0.12528666853904724, "eval_runtime": 14038.6098, "eval_samples_per_second": 1.593, "eval_steps_per_second": 0.066, "eval_wer": 4.441217841099868, "step": 4000 }, { "epoch": 1.38, "learning_rate": 8.805762711864408e-06, "loss": 0.1241, "step": 4025 }, { "epoch": 1.39, "learning_rate": 8.797288135593222e-06, "loss": 0.1159, "step": 4050 }, { "epoch": 1.4, "learning_rate": 8.788813559322035e-06, "loss": 0.1161, "step": 4075 }, { "epoch": 1.41, "learning_rate": 8.780338983050848e-06, "loss": 0.1064, "step": 4100 }, { "epoch": 1.42, "learning_rate": 8.771864406779661e-06, "loss": 0.1106, "step": 4125 }, { "epoch": 1.42, "learning_rate": 8.763389830508476e-06, "loss": 0.1048, "step": 4150 }, { "epoch": 1.43, "learning_rate": 8.754915254237289e-06, "loss": 0.1122, "step": 4175 }, { "epoch": 1.44, "learning_rate": 8.746440677966102e-06, "loss": 0.1262, "step": 4200 }, { "epoch": 1.45, "learning_rate": 8.737966101694917e-06, "loss": 0.1277, "step": 4225 }, { "epoch": 1.46, "learning_rate": 8.72949152542373e-06, "loss": 0.112, "step": 4250 }, { "epoch": 1.47, "learning_rate": 8.721016949152543e-06, "loss": 0.1011, "step": 4275 }, { "epoch": 1.48, "learning_rate": 8.712542372881356e-06, "loss": 0.106, "step": 4300 }, { "epoch": 1.48, "learning_rate": 8.70406779661017e-06, "loss": 0.1206, "step": 4325 }, { "epoch": 1.49, "learning_rate": 8.695593220338983e-06, "loss": 0.1097, "step": 4350 }, { "epoch": 1.5, "learning_rate": 8.687118644067796e-06, "loss": 0.1212, "step": 4375 }, { "epoch": 1.51, "learning_rate": 8.678644067796611e-06, "loss": 0.1068, "step": 4400 }, { "epoch": 1.52, "learning_rate": 8.670169491525424e-06, "loss": 0.1096, "step": 4425 }, { "epoch": 1.53, "learning_rate": 8.661694915254239e-06, "loss": 0.1062, "step": 4450 }, { "epoch": 1.54, "learning_rate": 8.653220338983051e-06, "loss": 0.1, "step": 4475 }, { "epoch": 1.54, "learning_rate": 8.644745762711864e-06, "loss": 0.1088, "step": 4500 }, { "epoch": 1.54, "eval_loss": 0.12188256531953812, "eval_runtime": 13978.7976, "eval_samples_per_second": 1.6, "eval_steps_per_second": 0.067, "eval_wer": 4.305549253595898, "step": 4500 }, { "epoch": 1.55, "learning_rate": 8.636271186440679e-06, "loss": 0.122, "step": 4525 }, { "epoch": 1.56, "learning_rate": 8.627796610169492e-06, "loss": 0.1094, "step": 4550 }, { "epoch": 1.57, "learning_rate": 8.619322033898305e-06, "loss": 0.1163, "step": 4575 }, { "epoch": 1.58, "learning_rate": 8.61084745762712e-06, "loss": 0.127, "step": 4600 }, { "epoch": 1.59, "learning_rate": 8.602372881355933e-06, "loss": 0.1122, "step": 4625 }, { "epoch": 1.6, "learning_rate": 8.593898305084747e-06, "loss": 0.1056, "step": 4650 }, { "epoch": 1.6, "learning_rate": 8.58542372881356e-06, "loss": 0.1051, "step": 4675 }, { "epoch": 1.61, "learning_rate": 8.576949152542373e-06, "loss": 0.1087, "step": 4700 }, { "epoch": 1.62, "learning_rate": 8.568474576271188e-06, "loss": 0.1155, "step": 4725 }, { "epoch": 1.63, "learning_rate": 8.560000000000001e-06, "loss": 0.114, "step": 4750 }, { "epoch": 1.64, "learning_rate": 8.551525423728814e-06, "loss": 0.1286, "step": 4775 }, { "epoch": 1.65, "learning_rate": 8.543050847457629e-06, "loss": 0.1052, "step": 4800 }, { "epoch": 1.66, "learning_rate": 8.534576271186442e-06, "loss": 0.1156, "step": 4825 }, { "epoch": 1.66, "learning_rate": 8.526101694915255e-06, "loss": 0.0998, "step": 4850 }, { "epoch": 1.67, "learning_rate": 8.517627118644068e-06, "loss": 0.1147, "step": 4875 }, { "epoch": 1.68, "learning_rate": 8.509152542372882e-06, "loss": 0.1007, "step": 4900 }, { "epoch": 1.69, "learning_rate": 8.500677966101695e-06, "loss": 0.1102, "step": 4925 }, { "epoch": 1.7, "learning_rate": 8.492203389830508e-06, "loss": 0.1113, "step": 4950 }, { "epoch": 1.71, "learning_rate": 8.483728813559323e-06, "loss": 0.1118, "step": 4975 }, { "epoch": 1.72, "learning_rate": 8.475254237288136e-06, "loss": 0.1074, "step": 5000 }, { "epoch": 1.72, "eval_loss": 0.12055900692939758, "eval_runtime": 13950.039, "eval_samples_per_second": 1.603, "eval_steps_per_second": 0.067, "eval_wer": 4.194382685239803, "step": 5000 }, { "epoch": 1.72, "learning_rate": 8.466779661016949e-06, "loss": 0.1024, "step": 5025 }, { "epoch": 1.73, "learning_rate": 8.458305084745764e-06, "loss": 0.1078, "step": 5050 }, { "epoch": 1.74, "learning_rate": 8.449830508474577e-06, "loss": 0.1198, "step": 5075 }, { "epoch": 1.75, "learning_rate": 8.44135593220339e-06, "loss": 0.1052, "step": 5100 }, { "epoch": 1.76, "learning_rate": 8.432881355932204e-06, "loss": 0.1104, "step": 5125 }, { "epoch": 1.77, "learning_rate": 8.424406779661017e-06, "loss": 0.111, "step": 5150 }, { "epoch": 1.78, "learning_rate": 8.415932203389832e-06, "loss": 0.114, "step": 5175 }, { "epoch": 1.78, "learning_rate": 8.407457627118645e-06, "loss": 0.1074, "step": 5200 }, { "epoch": 1.79, "learning_rate": 8.398983050847458e-06, "loss": 0.1117, "step": 5225 }, { "epoch": 1.8, "learning_rate": 8.390508474576273e-06, "loss": 0.1059, "step": 5250 }, { "epoch": 1.81, "learning_rate": 8.382033898305085e-06, "loss": 0.126, "step": 5275 }, { "epoch": 1.82, "learning_rate": 8.3735593220339e-06, "loss": 0.1037, "step": 5300 }, { "epoch": 1.83, "learning_rate": 8.365084745762713e-06, "loss": 0.0988, "step": 5325 }, { "epoch": 1.84, "learning_rate": 8.356610169491526e-06, "loss": 0.1053, "step": 5350 }, { "epoch": 1.84, "learning_rate": 8.34813559322034e-06, "loss": 0.1128, "step": 5375 }, { "epoch": 1.85, "learning_rate": 8.339661016949154e-06, "loss": 0.1132, "step": 5400 }, { "epoch": 1.86, "learning_rate": 8.331186440677967e-06, "loss": 0.1083, "step": 5425 }, { "epoch": 1.87, "learning_rate": 8.32271186440678e-06, "loss": 0.128, "step": 5450 }, { "epoch": 1.88, "learning_rate": 8.314237288135594e-06, "loss": 0.1044, "step": 5475 }, { "epoch": 1.89, "learning_rate": 8.305762711864407e-06, "loss": 0.1037, "step": 5500 }, { "epoch": 1.89, "eval_loss": 0.1189337745308876, "eval_runtime": 14041.5407, "eval_samples_per_second": 1.593, "eval_steps_per_second": 0.066, "eval_wer": 4.197558872907119, "step": 5500 }, { "epoch": 1.9, "learning_rate": 8.29728813559322e-06, "loss": 0.1185, "step": 5525 }, { "epoch": 1.9, "learning_rate": 8.288813559322035e-06, "loss": 0.1117, "step": 5550 }, { "epoch": 1.91, "learning_rate": 8.280338983050848e-06, "loss": 0.1185, "step": 5575 }, { "epoch": 1.92, "learning_rate": 8.271864406779661e-06, "loss": 0.1155, "step": 5600 }, { "epoch": 1.93, "learning_rate": 8.263389830508474e-06, "loss": 0.1203, "step": 5625 }, { "epoch": 1.94, "learning_rate": 8.254915254237289e-06, "loss": 0.1065, "step": 5650 }, { "epoch": 1.95, "learning_rate": 8.246440677966102e-06, "loss": 0.1063, "step": 5675 }, { "epoch": 1.96, "learning_rate": 8.237966101694916e-06, "loss": 0.113, "step": 5700 }, { "epoch": 1.96, "learning_rate": 8.22949152542373e-06, "loss": 0.1073, "step": 5725 }, { "epoch": 1.97, "learning_rate": 8.221016949152542e-06, "loss": 0.1045, "step": 5750 }, { "epoch": 1.98, "learning_rate": 8.212542372881357e-06, "loss": 0.1053, "step": 5775 }, { "epoch": 1.99, "learning_rate": 8.20406779661017e-06, "loss": 0.0961, "step": 5800 }, { "epoch": 2.0, "learning_rate": 8.195593220338983e-06, "loss": 0.1045, "step": 5825 }, { "epoch": 2.01, "learning_rate": 8.187118644067798e-06, "loss": 0.0678, "step": 5850 }, { "epoch": 2.02, "learning_rate": 8.17864406779661e-06, "loss": 0.0583, "step": 5875 }, { "epoch": 2.02, "learning_rate": 8.170169491525425e-06, "loss": 0.0615, "step": 5900 }, { "epoch": 2.03, "learning_rate": 8.161694915254238e-06, "loss": 0.0624, "step": 5925 }, { "epoch": 2.04, "learning_rate": 8.153220338983051e-06, "loss": 0.0561, "step": 5950 }, { "epoch": 2.05, "learning_rate": 8.144745762711866e-06, "loss": 0.0633, "step": 5975 }, { "epoch": 2.06, "learning_rate": 8.136271186440679e-06, "loss": 0.0575, "step": 6000 }, { "epoch": 2.06, "eval_loss": 0.12541243433952332, "eval_runtime": 13932.2659, "eval_samples_per_second": 1.605, "eval_steps_per_second": 0.067, "eval_wer": 4.1412949770860745, "step": 6000 }, { "epoch": 2.07, "learning_rate": 8.127796610169494e-06, "loss": 0.0622, "step": 6025 }, { "epoch": 2.08, "learning_rate": 8.119322033898307e-06, "loss": 0.0632, "step": 6050 }, { "epoch": 2.08, "learning_rate": 8.11084745762712e-06, "loss": 0.0542, "step": 6075 }, { "epoch": 2.09, "learning_rate": 8.102372881355932e-06, "loss": 0.0644, "step": 6100 }, { "epoch": 2.1, "learning_rate": 8.093898305084747e-06, "loss": 0.0671, "step": 6125 }, { "epoch": 2.11, "learning_rate": 8.08542372881356e-06, "loss": 0.0532, "step": 6150 }, { "epoch": 2.12, "learning_rate": 8.076949152542373e-06, "loss": 0.064, "step": 6175 }, { "epoch": 2.13, "learning_rate": 8.068474576271186e-06, "loss": 0.0624, "step": 6200 }, { "epoch": 2.14, "learning_rate": 8.06e-06, "loss": 0.0722, "step": 6225 }, { "epoch": 2.14, "learning_rate": 8.051525423728814e-06, "loss": 0.0674, "step": 6250 }, { "epoch": 2.15, "learning_rate": 8.043050847457627e-06, "loss": 0.0573, "step": 6275 }, { "epoch": 2.16, "learning_rate": 8.034576271186441e-06, "loss": 0.0618, "step": 6300 }, { "epoch": 2.17, "learning_rate": 8.026101694915254e-06, "loss": 0.0647, "step": 6325 }, { "epoch": 2.18, "learning_rate": 8.017627118644067e-06, "loss": 0.0667, "step": 6350 }, { "epoch": 2.19, "learning_rate": 8.009152542372882e-06, "loss": 0.0569, "step": 6375 }, { "epoch": 2.2, "learning_rate": 8.000677966101695e-06, "loss": 0.0755, "step": 6400 }, { "epoch": 2.2, "learning_rate": 7.99220338983051e-06, "loss": 0.0617, "step": 6425 }, { "epoch": 2.21, "learning_rate": 7.983728813559323e-06, "loss": 0.0592, "step": 6450 }, { "epoch": 2.22, "learning_rate": 7.975254237288136e-06, "loss": 0.0578, "step": 6475 }, { "epoch": 2.23, "learning_rate": 7.96677966101695e-06, "loss": 0.0623, "step": 6500 }, { "epoch": 2.23, "eval_loss": 0.12663590908050537, "eval_runtime": 14047.6478, "eval_samples_per_second": 1.592, "eval_steps_per_second": 0.066, "eval_wer": 4.139933753800082, "step": 6500 }, { "epoch": 2.24, "learning_rate": 7.958305084745763e-06, "loss": 0.0718, "step": 6525 }, { "epoch": 2.25, "learning_rate": 7.949830508474576e-06, "loss": 0.0643, "step": 6550 }, { "epoch": 2.26, "learning_rate": 7.941355932203391e-06, "loss": 0.0648, "step": 6575 }, { "epoch": 2.26, "learning_rate": 7.932881355932204e-06, "loss": 0.0632, "step": 6600 }, { "epoch": 2.27, "learning_rate": 7.924406779661019e-06, "loss": 0.0686, "step": 6625 }, { "epoch": 2.28, "learning_rate": 7.915932203389832e-06, "loss": 0.0675, "step": 6650 }, { "epoch": 2.29, "learning_rate": 7.907457627118645e-06, "loss": 0.0624, "step": 6675 }, { "epoch": 2.3, "learning_rate": 7.89898305084746e-06, "loss": 0.0553, "step": 6700 }, { "epoch": 2.31, "learning_rate": 7.890508474576272e-06, "loss": 0.0643, "step": 6725 }, { "epoch": 2.32, "learning_rate": 7.882033898305085e-06, "loss": 0.0573, "step": 6750 }, { "epoch": 2.32, "learning_rate": 7.8735593220339e-06, "loss": 0.0698, "step": 6775 }, { "epoch": 2.33, "learning_rate": 7.865084745762713e-06, "loss": 0.0659, "step": 6800 }, { "epoch": 2.34, "learning_rate": 7.856610169491526e-06, "loss": 0.0654, "step": 6825 }, { "epoch": 2.35, "learning_rate": 7.848135593220339e-06, "loss": 0.064, "step": 6850 }, { "epoch": 2.36, "learning_rate": 7.839661016949153e-06, "loss": 0.0615, "step": 6875 }, { "epoch": 2.37, "learning_rate": 7.831186440677966e-06, "loss": 0.0579, "step": 6900 }, { "epoch": 2.38, "learning_rate": 7.82271186440678e-06, "loss": 0.0675, "step": 6925 }, { "epoch": 2.39, "learning_rate": 7.814237288135594e-06, "loss": 0.0682, "step": 6950 }, { "epoch": 2.39, "learning_rate": 7.805762711864407e-06, "loss": 0.0682, "step": 6975 }, { "epoch": 2.4, "learning_rate": 7.79728813559322e-06, "loss": 0.0745, "step": 7000 }, { "epoch": 2.4, "eval_loss": 0.12451925128698349, "eval_runtime": 13911.3227, "eval_samples_per_second": 1.608, "eval_steps_per_second": 0.067, "eval_wer": 4.105449430554926, "step": 7000 }, { "epoch": 2.41, "learning_rate": 7.788813559322035e-06, "loss": 0.0574, "step": 7025 }, { "epoch": 2.42, "learning_rate": 7.780338983050848e-06, "loss": 0.0597, "step": 7050 }, { "epoch": 2.43, "learning_rate": 7.77186440677966e-06, "loss": 0.0613, "step": 7075 }, { "epoch": 2.44, "learning_rate": 7.763389830508475e-06, "loss": 0.0712, "step": 7100 }, { "epoch": 2.45, "learning_rate": 7.754915254237288e-06, "loss": 0.0622, "step": 7125 }, { "epoch": 2.45, "learning_rate": 7.746440677966103e-06, "loss": 0.0631, "step": 7150 }, { "epoch": 2.46, "learning_rate": 7.737966101694916e-06, "loss": 0.0696, "step": 7175 }, { "epoch": 2.47, "learning_rate": 7.729491525423729e-06, "loss": 0.0657, "step": 7200 }, { "epoch": 2.48, "learning_rate": 7.721016949152544e-06, "loss": 0.0593, "step": 7225 }, { "epoch": 2.49, "learning_rate": 7.712542372881357e-06, "loss": 0.0644, "step": 7250 }, { "epoch": 2.5, "learning_rate": 7.70406779661017e-06, "loss": 0.0602, "step": 7275 }, { "epoch": 2.51, "learning_rate": 7.695593220338984e-06, "loss": 0.0662, "step": 7300 }, { "epoch": 2.51, "learning_rate": 7.687118644067797e-06, "loss": 0.0654, "step": 7325 }, { "epoch": 2.52, "learning_rate": 7.678983050847458e-06, "loss": 0.0699, "step": 7350 }, { "epoch": 2.53, "learning_rate": 7.670508474576271e-06, "loss": 0.0657, "step": 7375 }, { "epoch": 2.54, "learning_rate": 7.662033898305086e-06, "loss": 0.0668, "step": 7400 }, { "epoch": 2.55, "learning_rate": 7.653559322033899e-06, "loss": 0.0598, "step": 7425 }, { "epoch": 2.56, "learning_rate": 7.645084745762712e-06, "loss": 0.062, "step": 7450 }, { "epoch": 2.57, "learning_rate": 7.636610169491526e-06, "loss": 0.0657, "step": 7475 }, { "epoch": 2.57, "learning_rate": 7.628135593220339e-06, "loss": 0.0686, "step": 7500 }, { "epoch": 2.57, "eval_loss": 0.12511542439460754, "eval_runtime": 13875.855, "eval_samples_per_second": 1.612, "eval_steps_per_second": 0.067, "eval_wer": 4.054176686782522, "step": 7500 } ], "max_steps": 30000, "num_train_epochs": 11, "total_flos": 3.6740551944830976e+20, "trial_name": null, "trial_params": null }