|
{ |
|
"best_metric": 0.7940676212310791, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.13368983957219252, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00267379679144385, |
|
"grad_norm": 13.120458602905273, |
|
"learning_rate": 1e-05, |
|
"loss": 3.0135, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00267379679144385, |
|
"eval_loss": 2.9659292697906494, |
|
"eval_runtime": 30.2128, |
|
"eval_samples_per_second": 20.852, |
|
"eval_steps_per_second": 2.615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0053475935828877, |
|
"grad_norm": 11.394423484802246, |
|
"learning_rate": 2e-05, |
|
"loss": 2.7657, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008021390374331552, |
|
"grad_norm": 9.692898750305176, |
|
"learning_rate": 3e-05, |
|
"loss": 2.9229, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0106951871657754, |
|
"grad_norm": 6.488343715667725, |
|
"learning_rate": 4e-05, |
|
"loss": 2.6038, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013368983957219251, |
|
"grad_norm": 4.95293664932251, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0767, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016042780748663103, |
|
"grad_norm": 5.345605373382568, |
|
"learning_rate": 6e-05, |
|
"loss": 2.0136, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01871657754010695, |
|
"grad_norm": 5.651119709014893, |
|
"learning_rate": 7e-05, |
|
"loss": 1.889, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0213903743315508, |
|
"grad_norm": 5.409059047698975, |
|
"learning_rate": 8e-05, |
|
"loss": 1.8266, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02406417112299465, |
|
"grad_norm": 5.7462944984436035, |
|
"learning_rate": 9e-05, |
|
"loss": 1.5063, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.026737967914438502, |
|
"grad_norm": 5.222486972808838, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5455, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.029411764705882353, |
|
"grad_norm": 3.9986016750335693, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 1.448, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03208556149732621, |
|
"grad_norm": 3.586981773376465, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 1.1441, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.034759358288770054, |
|
"grad_norm": 3.4107515811920166, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 1.2066, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0374331550802139, |
|
"grad_norm": 4.240262031555176, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.0419, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.040106951871657755, |
|
"grad_norm": 3.404278039932251, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.2043, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0427807486631016, |
|
"grad_norm": 2.843229055404663, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 1.0498, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 3.237807273864746, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 1.0109, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0481283422459893, |
|
"grad_norm": 3.588984251022339, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.0928, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05080213903743316, |
|
"grad_norm": 3.051542282104492, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 0.9856, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 2.899651527404785, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.9596, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05614973262032086, |
|
"grad_norm": 2.7841622829437256, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 0.9303, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 2.976398229598999, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.9815, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06149732620320856, |
|
"grad_norm": 3.123530626296997, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 1.0648, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06417112299465241, |
|
"grad_norm": 2.737558126449585, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.834, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06684491978609626, |
|
"grad_norm": 3.086487293243408, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.9048, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06684491978609626, |
|
"eval_loss": 0.8926352858543396, |
|
"eval_runtime": 30.4861, |
|
"eval_samples_per_second": 20.665, |
|
"eval_steps_per_second": 2.591, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06951871657754011, |
|
"grad_norm": 2.6327173709869385, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.9139, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07219251336898395, |
|
"grad_norm": 2.566983222961426, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 0.8632, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0748663101604278, |
|
"grad_norm": 2.66111159324646, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 0.8961, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07754010695187166, |
|
"grad_norm": 2.4014999866485596, |
|
"learning_rate": 5.392295478639225e-05, |
|
"loss": 0.8594, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08021390374331551, |
|
"grad_norm": 2.5496883392333984, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9502, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08288770053475936, |
|
"grad_norm": 2.623340368270874, |
|
"learning_rate": 4.607704521360776e-05, |
|
"loss": 0.8187, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0855614973262032, |
|
"grad_norm": 2.894226551055908, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 0.8912, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08823529411764706, |
|
"grad_norm": 2.7737977504730225, |
|
"learning_rate": 3.832773180720475e-05, |
|
"loss": 0.851, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 3.402663469314575, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.8979, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09358288770053476, |
|
"grad_norm": 2.527940511703491, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.8144, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0962566844919786, |
|
"grad_norm": 2.616532564163208, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 0.8559, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09893048128342247, |
|
"grad_norm": 2.637568473815918, |
|
"learning_rate": 2.3875071764202563e-05, |
|
"loss": 0.8393, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10160427807486631, |
|
"grad_norm": 2.4025931358337402, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.821, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10427807486631016, |
|
"grad_norm": 2.9822089672088623, |
|
"learning_rate": 1.7527597583490822e-05, |
|
"loss": 1.0798, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 2.652627468109131, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.845, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10962566844919786, |
|
"grad_norm": 2.6627089977264404, |
|
"learning_rate": 1.1979701719998453e-05, |
|
"loss": 0.9285, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11229946524064172, |
|
"grad_norm": 2.631807327270508, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.8711, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11497326203208556, |
|
"grad_norm": 2.398806571960449, |
|
"learning_rate": 7.367991782295391e-06, |
|
"loss": 0.8058, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 2.3895256519317627, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 0.8576, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12032085561497326, |
|
"grad_norm": 2.452927827835083, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.8259, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12299465240641712, |
|
"grad_norm": 2.3350183963775635, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.7746, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12566844919786097, |
|
"grad_norm": 2.2762725353240967, |
|
"learning_rate": 1.3815039801161721e-06, |
|
"loss": 0.7767, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12834224598930483, |
|
"grad_norm": 2.786212682723999, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 0.9144, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13101604278074866, |
|
"grad_norm": 2.353525161743164, |
|
"learning_rate": 1.5413331334360182e-07, |
|
"loss": 0.7969, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13368983957219252, |
|
"grad_norm": 2.672391176223755, |
|
"learning_rate": 0.0, |
|
"loss": 0.7842, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13368983957219252, |
|
"eval_loss": 0.7940676212310791, |
|
"eval_runtime": 30.486, |
|
"eval_samples_per_second": 20.665, |
|
"eval_steps_per_second": 2.591, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0821993309863936e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|