{ "best_metric": 2.505044460296631, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.23837902264600716, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004767580452920143, "grad_norm": 3.17598819732666, "learning_rate": 1e-05, "loss": 2.7102, "step": 1 }, { "epoch": 0.004767580452920143, "eval_loss": 4.050970077514648, "eval_runtime": 24.7205, "eval_samples_per_second": 14.32, "eval_steps_per_second": 3.6, "step": 1 }, { "epoch": 0.009535160905840286, "grad_norm": 3.5215253829956055, "learning_rate": 2e-05, "loss": 2.6229, "step": 2 }, { "epoch": 0.014302741358760428, "grad_norm": 1.8396703004837036, "learning_rate": 3e-05, "loss": 2.5976, "step": 3 }, { "epoch": 0.01907032181168057, "grad_norm": 1.822470784187317, "learning_rate": 4e-05, "loss": 2.8094, "step": 4 }, { "epoch": 0.023837902264600714, "grad_norm": 1.784137487411499, "learning_rate": 5e-05, "loss": 2.7152, "step": 5 }, { "epoch": 0.028605482717520857, "grad_norm": 1.5671970844268799, "learning_rate": 6e-05, "loss": 2.8053, "step": 6 }, { "epoch": 0.033373063170441, "grad_norm": 1.2720322608947754, "learning_rate": 7e-05, "loss": 2.7655, "step": 7 }, { "epoch": 0.03814064362336114, "grad_norm": 1.3592026233673096, "learning_rate": 8e-05, "loss": 2.852, "step": 8 }, { "epoch": 0.04290822407628129, "grad_norm": 1.2666702270507812, "learning_rate": 9e-05, "loss": 2.8961, "step": 9 }, { "epoch": 0.04767580452920143, "grad_norm": 1.1434534788131714, "learning_rate": 0.0001, "loss": 2.5641, "step": 10 }, { "epoch": 0.052443384982121574, "grad_norm": 1.2260290384292603, "learning_rate": 9.999316524962345e-05, "loss": 2.7242, "step": 11 }, { "epoch": 0.057210965435041714, "grad_norm": 1.1492360830307007, "learning_rate": 9.997266286704631e-05, "loss": 2.6978, "step": 12 }, { "epoch": 0.06197854588796186, "grad_norm": 1.1313751935958862, "learning_rate": 9.993849845741524e-05, "loss": 2.4025, "step": 13 }, { "epoch": 0.066746126340882, "grad_norm": 1.216576099395752, "learning_rate": 9.989068136093873e-05, "loss": 2.5651, "step": 14 }, { "epoch": 0.07151370679380215, "grad_norm": 1.0459645986557007, "learning_rate": 9.98292246503335e-05, "loss": 2.5407, "step": 15 }, { "epoch": 0.07628128724672228, "grad_norm": 1.1250581741333008, "learning_rate": 9.975414512725057e-05, "loss": 2.5113, "step": 16 }, { "epoch": 0.08104886769964244, "grad_norm": 1.1516832113265991, "learning_rate": 9.966546331768191e-05, "loss": 2.5224, "step": 17 }, { "epoch": 0.08581644815256258, "grad_norm": 0.9984982013702393, "learning_rate": 9.956320346634876e-05, "loss": 2.5051, "step": 18 }, { "epoch": 0.09058402860548272, "grad_norm": 0.9816920161247253, "learning_rate": 9.944739353007344e-05, "loss": 2.5969, "step": 19 }, { "epoch": 0.09535160905840286, "grad_norm": 1.113448977470398, "learning_rate": 9.931806517013612e-05, "loss": 2.4411, "step": 20 }, { "epoch": 0.10011918951132301, "grad_norm": 1.2237359285354614, "learning_rate": 9.917525374361912e-05, "loss": 2.5044, "step": 21 }, { "epoch": 0.10488676996424315, "grad_norm": 1.055956482887268, "learning_rate": 9.901899829374047e-05, "loss": 2.3804, "step": 22 }, { "epoch": 0.10965435041716329, "grad_norm": 1.0263848304748535, "learning_rate": 9.884934153917997e-05, "loss": 2.3218, "step": 23 }, { "epoch": 0.11442193087008343, "grad_norm": 1.0815330743789673, "learning_rate": 9.86663298624003e-05, "loss": 2.4918, "step": 24 }, { "epoch": 0.11918951132300358, "grad_norm": 1.05085289478302, "learning_rate": 9.847001329696653e-05, "loss": 2.2042, "step": 25 }, { "epoch": 0.12395709177592372, "grad_norm": 1.147608757019043, "learning_rate": 9.826044551386744e-05, "loss": 2.4462, "step": 26 }, { "epoch": 0.12872467222884387, "grad_norm": 1.1940661668777466, "learning_rate": 9.803768380684242e-05, "loss": 2.4366, "step": 27 }, { "epoch": 0.133492252681764, "grad_norm": 1.2002006769180298, "learning_rate": 9.780178907671789e-05, "loss": 2.3046, "step": 28 }, { "epoch": 0.13825983313468415, "grad_norm": 1.225758671760559, "learning_rate": 9.755282581475769e-05, "loss": 2.344, "step": 29 }, { "epoch": 0.1430274135876043, "grad_norm": 1.453913927078247, "learning_rate": 9.729086208503174e-05, "loss": 2.3262, "step": 30 }, { "epoch": 0.14779499404052443, "grad_norm": 1.2313761711120605, "learning_rate": 9.701596950580806e-05, "loss": 2.2569, "step": 31 }, { "epoch": 0.15256257449344457, "grad_norm": 1.3174500465393066, "learning_rate": 9.672822322997305e-05, "loss": 2.0015, "step": 32 }, { "epoch": 0.1573301549463647, "grad_norm": 1.4254320859909058, "learning_rate": 9.642770192448536e-05, "loss": 2.2475, "step": 33 }, { "epoch": 0.16209773539928488, "grad_norm": 2.1407878398895264, "learning_rate": 9.611448774886924e-05, "loss": 2.4093, "step": 34 }, { "epoch": 0.16686531585220502, "grad_norm": 1.4871824979782104, "learning_rate": 9.578866633275288e-05, "loss": 2.2877, "step": 35 }, { "epoch": 0.17163289630512515, "grad_norm": 1.8685574531555176, "learning_rate": 9.545032675245813e-05, "loss": 2.1265, "step": 36 }, { "epoch": 0.1764004767580453, "grad_norm": 1.6461660861968994, "learning_rate": 9.509956150664796e-05, "loss": 2.4455, "step": 37 }, { "epoch": 0.18116805721096543, "grad_norm": 1.6946738958358765, "learning_rate": 9.473646649103818e-05, "loss": 2.4016, "step": 38 }, { "epoch": 0.18593563766388557, "grad_norm": 1.6008548736572266, "learning_rate": 9.43611409721806e-05, "loss": 2.5618, "step": 39 }, { "epoch": 0.1907032181168057, "grad_norm": 1.5371747016906738, "learning_rate": 9.397368756032445e-05, "loss": 2.4849, "step": 40 }, { "epoch": 0.19547079856972585, "grad_norm": 1.770545244216919, "learning_rate": 9.357421218136386e-05, "loss": 2.3975, "step": 41 }, { "epoch": 0.20023837902264602, "grad_norm": 1.924229621887207, "learning_rate": 9.316282404787871e-05, "loss": 2.7272, "step": 42 }, { "epoch": 0.20500595947556616, "grad_norm": 1.900739073753357, "learning_rate": 9.273963562927695e-05, "loss": 2.8349, "step": 43 }, { "epoch": 0.2097735399284863, "grad_norm": 2.0874862670898438, "learning_rate": 9.230476262104677e-05, "loss": 2.7109, "step": 44 }, { "epoch": 0.21454112038140644, "grad_norm": 1.8650916814804077, "learning_rate": 9.185832391312644e-05, "loss": 2.6047, "step": 45 }, { "epoch": 0.21930870083432658, "grad_norm": 2.0557563304901123, "learning_rate": 9.140044155740101e-05, "loss": 2.5962, "step": 46 }, { "epoch": 0.22407628128724671, "grad_norm": 1.7432714700698853, "learning_rate": 9.093124073433463e-05, "loss": 2.3363, "step": 47 }, { "epoch": 0.22884386174016685, "grad_norm": 2.252993583679199, "learning_rate": 9.045084971874738e-05, "loss": 2.5628, "step": 48 }, { "epoch": 0.23361144219308702, "grad_norm": 2.2323484420776367, "learning_rate": 8.995939984474624e-05, "loss": 2.4012, "step": 49 }, { "epoch": 0.23837902264600716, "grad_norm": 4.776430606842041, "learning_rate": 8.945702546981969e-05, "loss": 2.2413, "step": 50 }, { "epoch": 0.23837902264600716, "eval_loss": 2.505044460296631, "eval_runtime": 25.1253, "eval_samples_per_second": 14.089, "eval_steps_per_second": 3.542, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.743754247923302e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }