| { | |
| "best_metric": 0.6236987905363891, | |
| "best_model_checkpoint": "tiny-mistral/checkpoint-5144", | |
| "epoch": 12.0, | |
| "eval_steps": 500, | |
| "global_step": 7716, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 13.069233894348145, | |
| "learning_rate": 4.7413167444271646e-05, | |
| "loss": 1.4479, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6498838109992254, | |
| "eval_f1_macro": 0.45467871431664447, | |
| "eval_f1_micro": 0.6498838109992254, | |
| "eval_f1_weighted": 0.6214028764904677, | |
| "eval_loss": 1.118202805519104, | |
| "eval_macro_fpr": 0.039027492204575735, | |
| "eval_macro_sensitivity": 0.4743680961365506, | |
| "eval_macro_specificity": 0.9731278674075015, | |
| "eval_precision": 0.6258370043663808, | |
| "eval_precision_macro": 0.4712458606651466, | |
| "eval_recall": 0.6498838109992254, | |
| "eval_recall_macro": 0.4743680961365506, | |
| "eval_runtime": 52.8829, | |
| "eval_samples_per_second": 24.412, | |
| "eval_steps_per_second": 3.063, | |
| "eval_weighted_fpr": 0.03705525495982948, | |
| "eval_weighted_sensitivity": 0.6498838109992254, | |
| "eval_weighted_specificity": 0.9470342001132973, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 42.20966720581055, | |
| "learning_rate": 4.482115085536548e-05, | |
| "loss": 0.8133, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6986831913245546, | |
| "eval_f1_macro": 0.5473679856051097, | |
| "eval_f1_micro": 0.6986831913245546, | |
| "eval_f1_weighted": 0.6969580552726153, | |
| "eval_loss": 1.0854355096817017, | |
| "eval_macro_fpr": 0.030488809177152586, | |
| "eval_macro_sensitivity": 0.552765390116517, | |
| "eval_macro_specificity": 0.9773020581125585, | |
| "eval_precision": 0.7196764053623382, | |
| "eval_precision_macro": 0.5876945028566362, | |
| "eval_recall": 0.6986831913245546, | |
| "eval_recall_macro": 0.552765390116517, | |
| "eval_runtime": 93.8123, | |
| "eval_samples_per_second": 13.762, | |
| "eval_steps_per_second": 1.727, | |
| "eval_weighted_fpr": 0.029883997848966736, | |
| "eval_weighted_sensitivity": 0.6986831913245546, | |
| "eval_weighted_specificity": 0.9608476803638234, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 26.4113712310791, | |
| "learning_rate": 4.223950233281493e-05, | |
| "loss": 0.5592, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6986831913245546, | |
| "eval_f1_macro": 0.6012751387344262, | |
| "eval_f1_micro": 0.6986831913245546, | |
| "eval_f1_weighted": 0.6997681579317561, | |
| "eval_loss": 1.6113793849945068, | |
| "eval_macro_fpr": 0.030358926871943602, | |
| "eval_macro_sensitivity": 0.5880627120151358, | |
| "eval_macro_specificity": 0.9773067719065487, | |
| "eval_precision": 0.7107021565435693, | |
| "eval_precision_macro": 0.6368062038606529, | |
| "eval_recall": 0.6986831913245546, | |
| "eval_recall_macro": 0.5880627120151358, | |
| "eval_runtime": 96.7771, | |
| "eval_samples_per_second": 13.34, | |
| "eval_steps_per_second": 1.674, | |
| "eval_weighted_fpr": 0.029883997848966736, | |
| "eval_weighted_sensitivity": 0.6986831913245546, | |
| "eval_weighted_specificity": 0.9609183872736765, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 0.03971128165721893, | |
| "learning_rate": 3.964748574390876e-05, | |
| "loss": 0.3916, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 13.533605575561523, | |
| "learning_rate": 3.705546915500259e-05, | |
| "loss": 0.2375, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6955848179705655, | |
| "eval_f1_macro": 0.5698961254173763, | |
| "eval_f1_micro": 0.6955848179705655, | |
| "eval_f1_weighted": 0.6923294650516646, | |
| "eval_loss": 1.7778925895690918, | |
| "eval_macro_fpr": 0.030968414851007965, | |
| "eval_macro_sensitivity": 0.5666678921166624, | |
| "eval_macro_specificity": 0.9768109367511323, | |
| "eval_precision": 0.7000704251293273, | |
| "eval_precision_macro": 0.584042243736088, | |
| "eval_recall": 0.6955848179705655, | |
| "eval_recall_macro": 0.5666678921166624, | |
| "eval_runtime": 94.9154, | |
| "eval_samples_per_second": 13.602, | |
| "eval_steps_per_second": 1.707, | |
| "eval_weighted_fpr": 0.030312379483224065, | |
| "eval_weighted_sensitivity": 0.6955848179705655, | |
| "eval_weighted_specificity": 0.956579233296421, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 0.2196340560913086, | |
| "learning_rate": 3.446345256609643e-05, | |
| "loss": 0.1586, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6948102246320682, | |
| "eval_f1_macro": 0.5694948674292386, | |
| "eval_f1_micro": 0.6948102246320682, | |
| "eval_f1_weighted": 0.691666971595594, | |
| "eval_loss": 2.175213575363159, | |
| "eval_macro_fpr": 0.03156565589564158, | |
| "eval_macro_sensitivity": 0.5798857481261962, | |
| "eval_macro_specificity": 0.9769938659292313, | |
| "eval_precision": 0.7011163809357478, | |
| "eval_precision_macro": 0.5797124163320468, | |
| "eval_recall": 0.6948102246320682, | |
| "eval_recall_macro": 0.5798857481261962, | |
| "eval_runtime": 96.1916, | |
| "eval_samples_per_second": 13.421, | |
| "eval_steps_per_second": 1.684, | |
| "eval_weighted_fpr": 0.03042001235330451, | |
| "eval_weighted_sensitivity": 0.6948102246320682, | |
| "eval_weighted_specificity": 0.9600977643064038, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.0076691824942827225, | |
| "learning_rate": 3.1881804043545884e-05, | |
| "loss": 0.0956, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7079783113865221, | |
| "eval_f1_macro": 0.6114593419504739, | |
| "eval_f1_micro": 0.7079783113865221, | |
| "eval_f1_weighted": 0.710498142821041, | |
| "eval_loss": 2.326085329055786, | |
| "eval_macro_fpr": 0.029146593037922882, | |
| "eval_macro_sensitivity": 0.6191389353797161, | |
| "eval_macro_specificity": 0.9781721436893602, | |
| "eval_precision": 0.7213132077015695, | |
| "eval_precision_macro": 0.6169445235048693, | |
| "eval_recall": 0.7079783113865221, | |
| "eval_recall_macro": 0.6191389353797161, | |
| "eval_runtime": 108.9589, | |
| "eval_samples_per_second": 11.849, | |
| "eval_steps_per_second": 1.487, | |
| "eval_weighted_fpr": 0.028619145221285964, | |
| "eval_weighted_sensitivity": 0.7079783113865221, | |
| "eval_weighted_specificity": 0.9646038439538819, | |
| "step": 3858 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "grad_norm": 0.003827617969363928, | |
| "learning_rate": 2.928978745463971e-05, | |
| "loss": 0.0623, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.007828492671251297, | |
| "learning_rate": 2.669777086573354e-05, | |
| "loss": 0.044, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.715724244771495, | |
| "eval_f1_macro": 0.6014076418730009, | |
| "eval_f1_micro": 0.715724244771495, | |
| "eval_f1_weighted": 0.7131079309029965, | |
| "eval_loss": 2.330761671066284, | |
| "eval_macro_fpr": 0.028468870564731498, | |
| "eval_macro_sensitivity": 0.5938828421663026, | |
| "eval_macro_specificity": 0.9784561846662306, | |
| "eval_precision": 0.7142878695652373, | |
| "eval_precision_macro": 0.6184199211909682, | |
| "eval_recall": 0.715724244771495, | |
| "eval_recall_macro": 0.5938828421663026, | |
| "eval_runtime": 106.6283, | |
| "eval_samples_per_second": 12.107, | |
| "eval_steps_per_second": 1.519, | |
| "eval_weighted_fpr": 0.027587762158911525, | |
| "eval_weighted_sensitivity": 0.715724244771495, | |
| "eval_weighted_specificity": 0.9611185252219651, | |
| "step": 4501 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "grad_norm": 0.0054510668851435184, | |
| "learning_rate": 2.4105754276827372e-05, | |
| "loss": 0.0212, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7126258714175058, | |
| "eval_f1_macro": 0.6236987905363891, | |
| "eval_f1_micro": 0.7126258714175058, | |
| "eval_f1_weighted": 0.7047150280912466, | |
| "eval_loss": 2.5606603622436523, | |
| "eval_macro_fpr": 0.029426308648760717, | |
| "eval_macro_sensitivity": 0.6174588332474265, | |
| "eval_macro_specificity": 0.9780489131013995, | |
| "eval_precision": 0.7033345220138081, | |
| "eval_precision_macro": 0.6494012124714204, | |
| "eval_recall": 0.7126258714175058, | |
| "eval_recall_macro": 0.6174588332474265, | |
| "eval_runtime": 94.9282, | |
| "eval_samples_per_second": 13.6, | |
| "eval_steps_per_second": 1.707, | |
| "eval_weighted_fpr": 0.027997886951928157, | |
| "eval_weighted_sensitivity": 0.7126258714175058, | |
| "eval_weighted_specificity": 0.9581078251034848, | |
| "step": 5144 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "grad_norm": 0.002751357154920697, | |
| "learning_rate": 2.1513737687921205e-05, | |
| "loss": 0.0183, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7118512780790085, | |
| "eval_f1_macro": 0.5935171263710702, | |
| "eval_f1_micro": 0.7118512780790084, | |
| "eval_f1_weighted": 0.7088386468311138, | |
| "eval_loss": 2.640535354614258, | |
| "eval_macro_fpr": 0.02906658876848228, | |
| "eval_macro_sensitivity": 0.5850361401718444, | |
| "eval_macro_specificity": 0.978116781696577, | |
| "eval_precision": 0.7091923605282856, | |
| "eval_precision_macro": 0.6132653330348934, | |
| "eval_recall": 0.7118512780790085, | |
| "eval_recall_macro": 0.5850361401718444, | |
| "eval_runtime": 97.5564, | |
| "eval_samples_per_second": 13.233, | |
| "eval_steps_per_second": 1.661, | |
| "eval_weighted_fpr": 0.02810092158936395, | |
| "eval_weighted_sensitivity": 0.7118512780790085, | |
| "eval_weighted_specificity": 0.9599004473696481, | |
| "step": 5787 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 0.002242760267108679, | |
| "learning_rate": 1.8921721099015034e-05, | |
| "loss": 0.0145, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7087529047250194, | |
| "eval_f1_macro": 0.6038805077899971, | |
| "eval_f1_micro": 0.7087529047250194, | |
| "eval_f1_weighted": 0.7051494126143734, | |
| "eval_loss": 2.7267849445343018, | |
| "eval_macro_fpr": 0.0296548766519362, | |
| "eval_macro_sensitivity": 0.594490599401291, | |
| "eval_macro_specificity": 0.9777462293254275, | |
| "eval_precision": 0.7057517403420687, | |
| "eval_precision_macro": 0.6235388470549638, | |
| "eval_recall": 0.7087529047250194, | |
| "eval_recall_macro": 0.594490599401291, | |
| "eval_runtime": 109.4092, | |
| "eval_samples_per_second": 11.8, | |
| "eval_steps_per_second": 1.481, | |
| "eval_weighted_fpr": 0.028515091763992112, | |
| "eval_weighted_sensitivity": 0.7087529047250194, | |
| "eval_weighted_specificity": 0.9574405351563919, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "grad_norm": 0.001886312267743051, | |
| "learning_rate": 1.6329704510108863e-05, | |
| "loss": 0.0052, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "grad_norm": 0.002035327022895217, | |
| "learning_rate": 1.374287195438051e-05, | |
| "loss": 0.0065, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7149496514329977, | |
| "eval_f1_macro": 0.6067795155065238, | |
| "eval_f1_micro": 0.7149496514329978, | |
| "eval_f1_weighted": 0.7122837081983867, | |
| "eval_loss": 2.756761312484741, | |
| "eval_macro_fpr": 0.028612398833029382, | |
| "eval_macro_sensitivity": 0.5966419871344679, | |
| "eval_macro_specificity": 0.9783906945848296, | |
| "eval_precision": 0.7133091176874246, | |
| "eval_precision_macro": 0.6341724921390148, | |
| "eval_recall": 0.7149496514329977, | |
| "eval_recall_macro": 0.5966419871344679, | |
| "eval_runtime": 100.225, | |
| "eval_samples_per_second": 12.881, | |
| "eval_steps_per_second": 1.616, | |
| "eval_weighted_fpr": 0.027689992475545523, | |
| "eval_weighted_sensitivity": 0.7149496514329977, | |
| "eval_weighted_specificity": 0.9609107673394471, | |
| "step": 7073 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "grad_norm": 0.0026232649106532335, | |
| "learning_rate": 1.115085536547434e-05, | |
| "loss": 0.0012, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7087529047250194, | |
| "eval_f1_macro": 0.6011104583388692, | |
| "eval_f1_micro": 0.7087529047250194, | |
| "eval_f1_weighted": 0.7071091949038129, | |
| "eval_loss": 2.924286127090454, | |
| "eval_macro_fpr": 0.029552934169322215, | |
| "eval_macro_sensitivity": 0.5885935192068672, | |
| "eval_macro_specificity": 0.9777900660927055, | |
| "eval_precision": 0.7105516304488961, | |
| "eval_precision_macro": 0.6261125889684936, | |
| "eval_recall": 0.7087529047250194, | |
| "eval_recall_macro": 0.5885935192068672, | |
| "eval_runtime": 97.1906, | |
| "eval_samples_per_second": 13.283, | |
| "eval_steps_per_second": 1.667, | |
| "eval_weighted_fpr": 0.028515091763992112, | |
| "eval_weighted_sensitivity": 0.7087529047250194, | |
| "eval_weighted_specificity": 0.9580980866655633, | |
| "step": 7716 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 9645, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "total_flos": 3.4362266090471424e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |