|
{ |
|
"best_metric": 2.7940969467163086, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.026761819803746655, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.947071067499257e-05, |
|
"eval_loss": 3.2442233562469482, |
|
"eval_runtime": 149.0831, |
|
"eval_samples_per_second": 47.49, |
|
"eval_steps_per_second": 11.873, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005947071067499256, |
|
"grad_norm": 0.7875794172286987, |
|
"learning_rate": 4.1400000000000003e-05, |
|
"loss": 3.0668, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0011894142134998512, |
|
"grad_norm": 0.8943319916725159, |
|
"learning_rate": 8.280000000000001e-05, |
|
"loss": 3.1298, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001784121320249777, |
|
"grad_norm": 1.10039222240448, |
|
"learning_rate": 0.00012419999999999998, |
|
"loss": 3.0946, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0023788284269997025, |
|
"grad_norm": 1.2827677726745605, |
|
"learning_rate": 0.00016560000000000001, |
|
"loss": 2.9561, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0029735355337496285, |
|
"grad_norm": 2.5582001209259033, |
|
"learning_rate": 0.000207, |
|
"loss": 2.998, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0029735355337496285, |
|
"eval_loss": 2.9912269115448, |
|
"eval_runtime": 148.1587, |
|
"eval_samples_per_second": 47.787, |
|
"eval_steps_per_second": 11.947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.003568242640499554, |
|
"grad_norm": 0.7799744009971619, |
|
"learning_rate": 0.00020674787920189178, |
|
"loss": 3.003, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004162949747249479, |
|
"grad_norm": 0.8374800682067871, |
|
"learning_rate": 0.00020599274511475253, |
|
"loss": 3.0247, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.004757656853999405, |
|
"grad_norm": 0.9920192956924438, |
|
"learning_rate": 0.00020473827667594888, |
|
"loss": 2.9491, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0053523639607493305, |
|
"grad_norm": 1.210517168045044, |
|
"learning_rate": 0.00020299058552961598, |
|
"loss": 2.9129, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.005947071067499257, |
|
"grad_norm": 2.20344614982605, |
|
"learning_rate": 0.00020075818625134152, |
|
"loss": 2.9891, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.005947071067499257, |
|
"eval_loss": 2.934997797012329, |
|
"eval_runtime": 149.0913, |
|
"eval_samples_per_second": 47.488, |
|
"eval_steps_per_second": 11.872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006541778174249183, |
|
"grad_norm": 0.7027397155761719, |
|
"learning_rate": 0.00019805195486600916, |
|
"loss": 2.9015, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.007136485280999108, |
|
"grad_norm": 0.7384776473045349, |
|
"learning_rate": 0.00019488507586089894, |
|
"loss": 2.9129, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007731192387749034, |
|
"grad_norm": 0.8933420777320862, |
|
"learning_rate": 0.00019127297795219008, |
|
"loss": 2.8789, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.008325899494498959, |
|
"grad_norm": 1.1296390295028687, |
|
"learning_rate": 0.00018723325891780706, |
|
"loss": 2.8091, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008920606601248885, |
|
"grad_norm": 1.680759310722351, |
|
"learning_rate": 0.0001827855998628142, |
|
"loss": 2.8615, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.008920606601248885, |
|
"eval_loss": 2.9041831493377686, |
|
"eval_runtime": 149.8181, |
|
"eval_samples_per_second": 47.257, |
|
"eval_steps_per_second": 11.814, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.00951531370799881, |
|
"grad_norm": 0.7546591758728027, |
|
"learning_rate": 0.0001779516693350504, |
|
"loss": 2.8554, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.010110020814748736, |
|
"grad_norm": 0.7574900388717651, |
|
"learning_rate": 0.00017275501775814182, |
|
"loss": 2.7957, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.010704727921498661, |
|
"grad_norm": 0.8689918518066406, |
|
"learning_rate": 0.00016722096269620562, |
|
"loss": 2.8529, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.011299435028248588, |
|
"grad_norm": 1.0633153915405273, |
|
"learning_rate": 0.00016137646550922228, |
|
"loss": 2.8309, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.011894142134998514, |
|
"grad_norm": 2.1989660263061523, |
|
"learning_rate": 0.00015525, |
|
"loss": 2.9186, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.011894142134998514, |
|
"eval_loss": 2.8749983310699463, |
|
"eval_runtime": 149.067, |
|
"eval_samples_per_second": 47.495, |
|
"eval_steps_per_second": 11.874, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012488849241748439, |
|
"grad_norm": 0.6521171927452087, |
|
"learning_rate": 0.0001488714136926695, |
|
"loss": 2.9137, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.013083556348498365, |
|
"grad_norm": 0.7320899963378906, |
|
"learning_rate": 0.0001422717824185469, |
|
"loss": 2.8405, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01367826345524829, |
|
"grad_norm": 0.8370326161384583, |
|
"learning_rate": 0.00013548325891780705, |
|
"loss": 2.8327, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.014272970561998216, |
|
"grad_norm": 1.010695219039917, |
|
"learning_rate": 0.0001285389161945656, |
|
"loss": 2.8228, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.014867677668748141, |
|
"grad_norm": 1.9595260620117188, |
|
"learning_rate": 0.0001214725863885273, |
|
"loss": 2.772, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.014867677668748141, |
|
"eval_loss": 2.844726085662842, |
|
"eval_runtime": 150.3953, |
|
"eval_samples_per_second": 47.076, |
|
"eval_steps_per_second": 11.769, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.015462384775498068, |
|
"grad_norm": 0.6232745051383972, |
|
"learning_rate": 0.00011431869594820213, |
|
"loss": 2.8667, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.016057091882247992, |
|
"grad_norm": 0.7752615213394165, |
|
"learning_rate": 0.00010711209790870886, |
|
"loss": 2.8185, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.016651798988997917, |
|
"grad_norm": 0.8857811093330383, |
|
"learning_rate": 9.988790209129117e-05, |
|
"loss": 2.7915, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.017246506095747845, |
|
"grad_norm": 1.0391193628311157, |
|
"learning_rate": 9.268130405179787e-05, |
|
"loss": 2.795, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01784121320249777, |
|
"grad_norm": 1.5660728216171265, |
|
"learning_rate": 8.55274136114727e-05, |
|
"loss": 2.771, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01784121320249777, |
|
"eval_loss": 2.820711851119995, |
|
"eval_runtime": 149.8338, |
|
"eval_samples_per_second": 47.252, |
|
"eval_steps_per_second": 11.813, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.018435920309247695, |
|
"grad_norm": 0.658405065536499, |
|
"learning_rate": 7.84610838054344e-05, |
|
"loss": 2.8653, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.01903062741599762, |
|
"grad_norm": 0.7782045602798462, |
|
"learning_rate": 7.151674108219295e-05, |
|
"loss": 2.8074, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.019625334522747548, |
|
"grad_norm": 0.9159675240516663, |
|
"learning_rate": 6.472821758145309e-05, |
|
"loss": 2.814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.020220041629497473, |
|
"grad_norm": 1.0867841243743896, |
|
"learning_rate": 5.8128586307330475e-05, |
|
"loss": 2.7224, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.020814748736247397, |
|
"grad_norm": 1.6991764307022095, |
|
"learning_rate": 5.175000000000002e-05, |
|
"loss": 2.7216, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.020814748736247397, |
|
"eval_loss": 2.8091001510620117, |
|
"eval_runtime": 150.0782, |
|
"eval_samples_per_second": 47.175, |
|
"eval_steps_per_second": 11.794, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.021409455842997322, |
|
"grad_norm": 0.6833013296127319, |
|
"learning_rate": 4.5623534490777714e-05, |
|
"loss": 2.8396, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02200416294974725, |
|
"grad_norm": 0.7645152807235718, |
|
"learning_rate": 3.9779037303794365e-05, |
|
"loss": 2.7683, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.022598870056497175, |
|
"grad_norm": 0.88221675157547, |
|
"learning_rate": 3.42449822418582e-05, |
|
"loss": 2.7769, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0231935771632471, |
|
"grad_norm": 1.0650640726089478, |
|
"learning_rate": 2.9048330664949622e-05, |
|
"loss": 2.782, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.023788284269997028, |
|
"grad_norm": 1.8103938102722168, |
|
"learning_rate": 2.4214400137185785e-05, |
|
"loss": 2.8704, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.023788284269997028, |
|
"eval_loss": 2.798283100128174, |
|
"eval_runtime": 149.8594, |
|
"eval_samples_per_second": 47.244, |
|
"eval_steps_per_second": 11.811, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.024382991376746953, |
|
"grad_norm": 0.6215288639068604, |
|
"learning_rate": 1.976674108219295e-05, |
|
"loss": 2.8352, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.024977698483496878, |
|
"grad_norm": 0.7338668704032898, |
|
"learning_rate": 1.572702204780991e-05, |
|
"loss": 2.801, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.025572405590246802, |
|
"grad_norm": 0.9176461100578308, |
|
"learning_rate": 1.2114924139101056e-05, |
|
"loss": 2.7443, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.02616711269699673, |
|
"grad_norm": 1.041648507118225, |
|
"learning_rate": 8.948045133990798e-06, |
|
"loss": 2.7708, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.026761819803746655, |
|
"grad_norm": 1.8905234336853027, |
|
"learning_rate": 6.241813748658489e-06, |
|
"loss": 2.7077, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.026761819803746655, |
|
"eval_loss": 2.7940969467163086, |
|
"eval_runtime": 149.9687, |
|
"eval_samples_per_second": 47.21, |
|
"eval_steps_per_second": 11.802, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4749286456688640.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|