|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.999903428295509, |
|
"global_step": 51770, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.1583011583011583e-06, |
|
"loss": 2.3104, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028957528957528956, |
|
"loss": 1.4338, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005791505791505791, |
|
"loss": 1.2416, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005999696654176312, |
|
"loss": 1.2139, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005998690722049872, |
|
"loss": 1.1936, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005996980613784548, |
|
"loss": 1.1791, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005994566730961414, |
|
"loss": 1.1672, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005991449640427416, |
|
"loss": 1.1574, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0005987630074162269, |
|
"loss": 1.1486, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0005983108929106564, |
|
"loss": 1.1427, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005977887266951138, |
|
"loss": 1.1356, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0005971966313887766, |
|
"loss": 1.1272, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0005965347460321212, |
|
"loss": 1.1254, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0005958032260542726, |
|
"loss": 1.118, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0005950022432365049, |
|
"loss": 1.1146, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0005941319856719031, |
|
"loss": 1.1097, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0005931926577211924, |
|
"loss": 1.1063, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0005921844799647499, |
|
"loss": 1.1027, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0005911076891508052, |
|
"loss": 1.0999, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005899625381398457, |
|
"loss": 1.0966, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005887492958452381, |
|
"loss": 1.0931, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_alliteration_score": 0.3998726520216492, |
|
"eval_harmonic_meter_score": 0.1010308935212264, |
|
"eval_harmonic_rhyme_score": 0.35292231999777424, |
|
"eval_meter_score": 0.3077031352625674, |
|
"eval_rhyme_score": 0.7291149689204202, |
|
"eval_runtime": 3230.3537, |
|
"eval_samples_per_second": 0.836, |
|
"eval_steps_per_second": 0.026, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0005874682471700796, |
|
"loss": 1.0865, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0005861196929402952, |
|
"loss": 1.0684, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0005847039498339947, |
|
"loss": 1.0705, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0005832213503071088, |
|
"loss": 1.0694, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0005816722425153186, |
|
"loss": 1.0678, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0005800569902322985, |
|
"loss": 1.0669, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0005783759727642932, |
|
"loss": 1.0634, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0005766295848610451, |
|
"loss": 1.062, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005748182366230962, |
|
"loss": 1.0602, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0005729423534054853, |
|
"loss": 1.0587, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005710023757178627, |
|
"loss": 1.0564, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.000568998759121046, |
|
"loss": 1.0547, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0005669319741200425, |
|
"loss": 1.0536, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0005648025060535602, |
|
"loss": 1.0517, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0005626108549800381, |
|
"loss": 1.0498, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0005603575355602176, |
|
"loss": 1.0482, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0005580430769362867, |
|
"loss": 1.0479, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0005556680226076214, |
|
"loss": 1.0446, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0005532329303031583, |
|
"loss": 1.0444, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0005507383718504232, |
|
"loss": 1.0416, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0005481849330412508, |
|
"loss": 1.0392, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_alliteration_score": 0.43183984747378457, |
|
"eval_harmonic_meter_score": 0.09296526825958593, |
|
"eval_harmonic_rhyme_score": 0.5441505816703797, |
|
"eval_meter_score": 0.30327833996094755, |
|
"eval_rhyme_score": 0.8429996906126559, |
|
"eval_runtime": 2339.9147, |
|
"eval_samples_per_second": 1.154, |
|
"eval_steps_per_second": 0.036, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.000545573213494224, |
|
"loss": 1.0256, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0005429038265138671, |
|
"loss": 1.0149, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0005401773989466244, |
|
"loss": 1.0166, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0005373945710336596, |
|
"loss": 1.0157, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0005345559962605089, |
|
"loss": 1.0155, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0005316623412036252, |
|
"loss": 1.0135, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.000528714285373846, |
|
"loss": 1.0134, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0005257125210568268, |
|
"loss": 1.0118, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0005226577531504722, |
|
"loss": 1.012, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0005195506989994064, |
|
"loss": 1.0101, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0005163920882265211, |
|
"loss": 1.0097, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0005131826625616392, |
|
"loss": 1.0085, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0005099231756673361, |
|
"loss": 1.0052, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0005066143929619589, |
|
"loss": 1.0064, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.000503257091439885, |
|
"loss": 1.0044, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0004998520594890613, |
|
"loss": 1.0015, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00049640009670587, |
|
"loss": 1.0014, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0004929020137073603, |
|
"loss": 1.0006, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0004893586319408926, |
|
"loss": 0.9997, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0004857707834912409, |
|
"loss": 0.9991, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0004821393108851951, |
|
"loss": 0.9969, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_alliteration_score": 0.40482822655524603, |
|
"eval_harmonic_meter_score": 0.10445053009647749, |
|
"eval_harmonic_rhyme_score": 0.49816467657522634, |
|
"eval_meter_score": 0.3098783806475492, |
|
"eval_rhyme_score": 0.8195044525640993, |
|
"eval_runtime": 2332.5068, |
|
"eval_samples_per_second": 1.158, |
|
"eval_steps_per_second": 0.036, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0004784650668937127, |
|
"loss": 0.9695, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0004747489143316642, |
|
"loss": 0.9666, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0004709917258552203, |
|
"loss": 0.9684, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00046719438375692797, |
|
"loss": 0.9701, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0004633577797585233, |
|
"loss": 0.9693, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0004594828148015305, |
|
"loss": 0.9689, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00045557039883569595, |
|
"loss": 0.9675, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0004516214506053063, |
|
"loss": 0.966, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.000447636897433442, |
|
"loss": 0.9664, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000443617675004216, |
|
"loss": 0.9645, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00043956472714304834, |
|
"loss": 0.9635, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0004354790055950309, |
|
"loss": 0.9621, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0004313614698014302, |
|
"loss": 0.962, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00042721308667438394, |
|
"loss": 0.9609, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00042303483036984366, |
|
"loss": 0.9596, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00041882768205881495, |
|
"loss": 0.9578, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00041459262969695184, |
|
"loss": 0.9568, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0004103306677925571, |
|
"loss": 0.9552, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00040604279717304357, |
|
"loss": 0.9534, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0004017300247499127, |
|
"loss": 0.9541, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_alliteration_score": 0.43839452395768513, |
|
"eval_harmonic_meter_score": 0.12086540507848588, |
|
"eval_harmonic_rhyme_score": 0.4986143276050798, |
|
"eval_meter_score": 0.338880767934961, |
|
"eval_rhyme_score": 0.8207791218734892, |
|
"eval_runtime": 2519.5282, |
|
"eval_samples_per_second": 1.072, |
|
"eval_steps_per_second": 0.034, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00039739336328230323, |
|
"loss": 0.9477, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00039303383113916687, |
|
"loss": 0.9123, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00038865245206012774, |
|
"loss": 0.9168, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00038425025491507883, |
|
"loss": 0.9193, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0003798282734625755, |
|
"loss": 0.9201, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0003753875461070794, |
|
"loss": 0.9188, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0003709291156551129, |
|
"loss": 0.9192, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0003664540290703784, |
|
"loss": 0.9173, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00036196333722790264, |
|
"loss": 0.9166, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00035745809466726145, |
|
"loss": 0.9145, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0003529393593449451, |
|
"loss": 0.9144, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00034840819238591994, |
|
"loss": 0.9139, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0003438656578344473, |
|
"loss": 0.9126, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0003393128224042155, |
|
"loss": 0.9119, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0003347507552278469, |
|
"loss": 0.9099, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00033018052760583447, |
|
"loss": 0.9072, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0003256032127549717, |
|
"loss": 0.908, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0003210198855563304, |
|
"loss": 0.9063, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00031643162230284954, |
|
"loss": 0.9036, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00031183950044659135, |
|
"loss": 0.9039, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0003072445983457252, |
|
"loss": 0.9023, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_alliteration_score": 0.432449105490438, |
|
"eval_harmonic_meter_score": 0.11469247564220626, |
|
"eval_harmonic_rhyme_score": 0.5565104008090618, |
|
"eval_meter_score": 0.3233335011377191, |
|
"eval_rhyme_score": 0.8467553072173599, |
|
"eval_runtime": 2231.152, |
|
"eval_samples_per_second": 1.21, |
|
"eval_steps_per_second": 0.038, |
|
"step": 25885 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0003026479950112996, |
|
"loss": 0.8805, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00029805076985386, |
|
"loss": 0.8592, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00029345400242997323, |
|
"loss": 0.8613, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002888587721887175, |
|
"loss": 0.8621, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.0002842661582181979, |
|
"loss": 0.8628, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.000279677238992146, |
|
"loss": 0.8614, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00027509309211666463, |
|
"loss": 0.8617, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002705147940771754, |
|
"loss": 0.8606, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0002659434199856307, |
|
"loss": 0.8597, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0002613800433280466, |
|
"loss": 0.8589, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002568257357124192, |
|
"loss": 0.856, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0002522815666170804, |
|
"loss": 0.8542, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00024774860313955555, |
|
"loss": 0.8544, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00024322790974597822, |
|
"loss": 0.8517, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023872054802112475, |
|
"loss": 0.8522, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00023422757641912385, |
|
"loss": 0.8512, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002297500500149027, |
|
"loss": 0.8495, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00022528902025642543, |
|
"loss": 0.8473, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00022084553471778432, |
|
"loss": 0.8451, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00021642063685319983, |
|
"loss": 0.8414, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00021201536575198834, |
|
"loss": 0.8411, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_alliteration_score": 0.43164362519201227, |
|
"eval_harmonic_meter_score": 0.10449656298407087, |
|
"eval_harmonic_rhyme_score": 0.5587655117502281, |
|
"eval_meter_score": 0.3140831187981907, |
|
"eval_rhyme_score": 0.8511610758760736, |
|
"eval_runtime": 2100.1558, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.04, |
|
"step": 31062 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00020763075589455592, |
|
"loss": 0.8075, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00020326783690947226, |
|
"loss": 0.796, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00019892763333168628, |
|
"loss": 0.7985, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.000194611164361936, |
|
"loss": 0.7995, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0001903194436274124, |
|
"loss": 0.7987, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0001860534789437309, |
|
"loss": 0.7977, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00018181427207826875, |
|
"loss": 0.799, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0001776028185149218, |
|
"loss": 0.797, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00017342010722033724, |
|
"loss": 0.799, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00016926712041167666, |
|
"loss": 0.7938, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00016514483332596397, |
|
"loss": 0.7928, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.000161054213991073, |
|
"loss": 0.7948, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00015699622299840705, |
|
"loss": 0.7913, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00015297181327732549, |
|
"loss": 0.7911, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00014898192987136932, |
|
"loss": 0.7883, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.000145027509716339, |
|
"loss": 0.7865, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.0001411094814202753, |
|
"loss": 0.7863, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00013722876504539635, |
|
"loss": 0.7826, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00013338627189204153, |
|
"loss": 0.7829, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0001295829042846731, |
|
"loss": 0.7816, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_alliteration_score": 0.44621513944223107, |
|
"eval_harmonic_meter_score": 0.11710967524898745, |
|
"eval_harmonic_rhyme_score": 0.5515777130298052, |
|
"eval_meter_score": 0.33484803934701896, |
|
"eval_rhyme_score": 0.8477574870320731, |
|
"eval_runtime": 2122.0667, |
|
"eval_samples_per_second": 1.272, |
|
"eval_steps_per_second": 0.04, |
|
"step": 36239 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00012581955535998448, |
|
"loss": 0.7792, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0001220971088571674, |
|
"loss": 0.734, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00011841643891038518, |
|
"loss": 0.7362, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00011477840984350193, |
|
"loss": 0.7386, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00011118387596711477, |
|
"loss": 0.7384, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00010763368137793809, |
|
"loss": 0.7364, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00010412865976058613, |
|
"loss": 0.7367, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00010066963419180093, |
|
"loss": 0.7365, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 9.725741694717035e-05, |
|
"loss": 0.7343, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 9.389280931038336e-05, |
|
"loss": 0.737, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 9.057660138506682e-05, |
|
"loss": 0.7345, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 8.730957190924632e-05, |
|
"loss": 0.7332, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 8.409248807247727e-05, |
|
"loss": 0.7335, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 8.092610533568725e-05, |
|
"loss": 0.7302, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 7.781116725377309e-05, |
|
"loss": 0.7291, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 7.474840530099277e-05, |
|
"loss": 0.7303, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.173853869919559e-05, |
|
"loss": 0.7288, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.878227424892822e-05, |
|
"loss": 0.7283, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.588030616345898e-05, |
|
"loss": 0.726, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.303331590575642e-05, |
|
"loss": 0.7246, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 6.0241972028463316e-05, |
|
"loss": 0.7243, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_alliteration_score": 0.43950617283950616, |
|
"eval_harmonic_meter_score": 0.11078968787875117, |
|
"eval_harmonic_rhyme_score": 0.5599690603568903, |
|
"eval_meter_score": 0.3230506264598415, |
|
"eval_rhyme_score": 0.850570153117891, |
|
"eval_runtime": 2165.4131, |
|
"eval_samples_per_second": 1.247, |
|
"eval_steps_per_second": 0.039, |
|
"step": 41416 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 5.7506930016901755e-05, |
|
"loss": 0.7133, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.4828832135146994e-05, |
|
"loss": 0.6873, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.2208307275205774e-05, |
|
"loss": 0.6887, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.9645970809335146e-05, |
|
"loss": 0.6883, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.7142424445535695e-05, |
|
"loss": 0.6887, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 4.4698256086254156e-05, |
|
"loss": 0.688, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 4.231403969032698e-05, |
|
"loss": 0.6895, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 3.999033513819922e-05, |
|
"loss": 0.6901, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.772768810044874e-05, |
|
"loss": 0.6893, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.552662990964793e-05, |
|
"loss": 0.6873, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.338767743559162e-05, |
|
"loss": 0.6895, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 3.131133296392159e-05, |
|
"loss": 0.6868, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 2.929808407817651e-05, |
|
"loss": 0.6875, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 2.734840354529305e-05, |
|
"loss": 0.687, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.5462749204587507e-05, |
|
"loss": 0.6864, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.3641563860241965e-05, |
|
"loss": 0.6841, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.1885275177322048e-05, |
|
"loss": 0.6865, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 2.019429558134873e-05, |
|
"loss": 0.685, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 1.856902216144962e-05, |
|
"loss": 0.6834, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.7009836577111302e-05, |
|
"loss": 0.6827, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.551710496855515e-05, |
|
"loss": 0.6828, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_alliteration_score": 0.44789762340036565, |
|
"eval_harmonic_meter_score": 0.11082478811479961, |
|
"eval_harmonic_rhyme_score": 0.5453593958718314, |
|
"eval_meter_score": 0.328679747890422, |
|
"eval_rhyme_score": 0.8485504968592449, |
|
"eval_runtime": 2181.3379, |
|
"eval_samples_per_second": 1.238, |
|
"eval_steps_per_second": 0.039, |
|
"step": 46593 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.4091177870757209e-05, |
|
"loss": 0.673, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.2732390131132907e-05, |
|
"loss": 0.6644, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 1.1441060830905591e-05, |
|
"loss": 0.6632, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1.0217493210177418e-05, |
|
"loss": 0.6641, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 9.061974596719934e-06, |
|
"loss": 0.6654, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 7.974776338501631e-06, |
|
"loss": 0.6633, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.956153739967863e-06, |
|
"loss": 0.6656, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 6.0063460020883915e-06, |
|
"loss": 0.664, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 5.125576166185996e-06, |
|
"loss": 0.6638, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 4.314051061560497e-06, |
|
"loss": 0.6645, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3.571961256919276e-06, |
|
"loss": 0.6624, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.8994810156265035e-06, |
|
"loss": 0.663, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.2967682547812782e-06, |
|
"loss": 0.6631, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.7639645081341524e-06, |
|
"loss": 0.663, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.3011948928511873e-06, |
|
"loss": 0.6641, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 9.085680801330208e-07, |
|
"loss": 0.6634, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.861762696956151e-07, |
|
"loss": 0.6629, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.340951681194082e-07, |
|
"loss": 0.6652, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 1.523839710711683e-07, |
|
"loss": 0.663, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.108534940331365e-08, |
|
"loss": 0.6623, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.2543913346106945e-10, |
|
"loss": 0.6634, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_alliteration_score": 0.4406211936662607, |
|
"eval_harmonic_meter_score": 0.11891243404089967, |
|
"eval_harmonic_rhyme_score": 0.5297086635780841, |
|
"eval_meter_score": 0.33801080805418304, |
|
"eval_rhyme_score": 0.8432321768961265, |
|
"eval_runtime": 2127.802, |
|
"eval_samples_per_second": 1.269, |
|
"eval_steps_per_second": 0.04, |
|
"step": 51770 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 51770, |
|
"total_flos": 1.310953664443056e+18, |
|
"train_loss": 0.8821613311353097, |
|
"train_runtime": 68769.7958, |
|
"train_samples_per_second": 96.366, |
|
"train_steps_per_second": 0.753 |
|
} |
|
], |
|
"max_steps": 51770, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.310953664443056e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|