|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.993894993894994, |
|
"eval_steps": 100, |
|
"global_step": 1227, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02442002442002442, |
|
"grad_norm": 25.41930389404297, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.8076, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04884004884004884, |
|
"grad_norm": 11.460826873779297, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.7516, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 7.831686973571777, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.7034, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09768009768009768, |
|
"grad_norm": 7.234014987945557, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.6996, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1221001221001221, |
|
"grad_norm": 5.096914768218994, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6809, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 8.638614654541016, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.6892, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 13.534109115600586, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.6603, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19536019536019536, |
|
"grad_norm": 12.838939666748047, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.6225, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 14.190698623657227, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.5716, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2442002442002442, |
|
"grad_norm": 18.23443031311035, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2442002442002442, |
|
"eval_accuracy": 0.7611484422724496, |
|
"eval_accuracy_label_negative": 0.8535353535353535, |
|
"eval_accuracy_label_positive": 0.6745562130177515, |
|
"eval_f1": 0.7596398965081889, |
|
"eval_loss": 0.5274035930633545, |
|
"eval_precision": 0.772811008310922, |
|
"eval_recall": 0.7611484422724496, |
|
"eval_runtime": 5.88, |
|
"eval_samples_per_second": 278.402, |
|
"eval_steps_per_second": 17.517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2686202686202686, |
|
"grad_norm": 14.831952095031738, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.5022, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 18.24405860900879, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.4786, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 16.866863250732422, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.3629, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 18.194087982177734, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.3523, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 16.094165802001953, |
|
"learning_rate": 6e-06, |
|
"loss": 0.3272, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3907203907203907, |
|
"grad_norm": 41.393802642822266, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.307, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41514041514041516, |
|
"grad_norm": 23.695520401000977, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.2551, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 30.489437103271484, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.2536, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.463980463980464, |
|
"grad_norm": 47.37874221801758, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.2622, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4884004884004884, |
|
"grad_norm": 26.136856079101562, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4884004884004884, |
|
"eval_accuracy": 0.8979841172877214, |
|
"eval_accuracy_label_negative": 0.922979797979798, |
|
"eval_accuracy_label_positive": 0.8745562130177514, |
|
"eval_f1": 0.8979978224223449, |
|
"eval_loss": 0.2805691063404083, |
|
"eval_precision": 0.8993693797991343, |
|
"eval_recall": 0.8979841172877214, |
|
"eval_runtime": 6.3687, |
|
"eval_samples_per_second": 257.038, |
|
"eval_steps_per_second": 16.173, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 17.713157653808594, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.1874, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5372405372405372, |
|
"grad_norm": 5.685952186584473, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.2565, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5616605616605617, |
|
"grad_norm": 64.32303619384766, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.2763, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 26.14358139038086, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.2193, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6105006105006106, |
|
"grad_norm": 51.215972900390625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 77.36116027832031, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.2887, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 12.419713020324707, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 0.2836, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 51.27354049682617, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.2279, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7081807081807082, |
|
"grad_norm": 15.182161331176758, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.23, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 20.174087524414062, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"eval_accuracy": 0.902871105681124, |
|
"eval_accuracy_label_negative": 0.8434343434343434, |
|
"eval_accuracy_label_positive": 0.9585798816568047, |
|
"eval_f1": 0.9024095653522399, |
|
"eval_loss": 0.26104938983917236, |
|
"eval_precision": 0.9073823065773275, |
|
"eval_recall": 0.902871105681124, |
|
"eval_runtime": 6.46, |
|
"eval_samples_per_second": 253.406, |
|
"eval_steps_per_second": 15.944, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.757020757020757, |
|
"grad_norm": 19.257923126220703, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 0.2514, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7814407814407814, |
|
"grad_norm": 13.050935745239258, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.216, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 1.8762781620025635, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 0.2239, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8302808302808303, |
|
"grad_norm": 14.866827964782715, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.2124, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 16.646745681762695, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.2341, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 14.468786239624023, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.2397, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9035409035409036, |
|
"grad_norm": 16.370431900024414, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.2764, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.927960927960928, |
|
"grad_norm": 15.854681015014648, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.1964, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 19.42245101928711, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.168, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9768009768009768, |
|
"grad_norm": 4.485373497009277, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2357, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9768009768009768, |
|
"eval_accuracy": 0.9083689676237019, |
|
"eval_accuracy_label_negative": 0.9318181818181818, |
|
"eval_accuracy_label_positive": 0.8863905325443787, |
|
"eval_f1": 0.9083847657945654, |
|
"eval_loss": 0.2560252249240875, |
|
"eval_precision": 0.9095968991483547, |
|
"eval_recall": 0.9083689676237019, |
|
"eval_runtime": 6.4906, |
|
"eval_samples_per_second": 252.21, |
|
"eval_steps_per_second": 15.869, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 112.14476013183594, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.2322, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0244200244200243, |
|
"grad_norm": 19.950773239135742, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.1607, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0488400488400489, |
|
"grad_norm": 14.963955879211426, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.1943, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0732600732600732, |
|
"grad_norm": 12.724336624145508, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.1273, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0976800976800978, |
|
"grad_norm": 12.087287902832031, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2392, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.122100122100122, |
|
"grad_norm": 4.449314117431641, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.2444, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.1465201465201464, |
|
"grad_norm": 13.28513240814209, |
|
"learning_rate": 1.88e-05, |
|
"loss": 0.1995, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.170940170940171, |
|
"grad_norm": 9.664812088012695, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.2178, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1953601953601953, |
|
"grad_norm": 3.9961328506469727, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.1269, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2197802197802199, |
|
"grad_norm": 6.78753137588501, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2197802197802199, |
|
"eval_accuracy": 0.9089798411728772, |
|
"eval_accuracy_label_negative": 0.9507575757575758, |
|
"eval_accuracy_label_positive": 0.8698224852071006, |
|
"eval_f1": 0.9089364277932478, |
|
"eval_loss": 0.31265538930892944, |
|
"eval_precision": 0.9123227286547785, |
|
"eval_recall": 0.9089798411728772, |
|
"eval_runtime": 6.4966, |
|
"eval_samples_per_second": 251.979, |
|
"eval_steps_per_second": 15.855, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2442002442002442, |
|
"grad_norm": 4.199306488037109, |
|
"learning_rate": 1.9724896836313617e-05, |
|
"loss": 0.1993, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2686202686202686, |
|
"grad_norm": 47.193603515625, |
|
"learning_rate": 1.9449793672627236e-05, |
|
"loss": 0.1837, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.293040293040293, |
|
"grad_norm": 22.490142822265625, |
|
"learning_rate": 1.9174690508940855e-05, |
|
"loss": 0.2295, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3174603174603174, |
|
"grad_norm": 10.449468612670898, |
|
"learning_rate": 1.889958734525447e-05, |
|
"loss": 0.2589, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.341880341880342, |
|
"grad_norm": 49.228878021240234, |
|
"learning_rate": 1.862448418156809e-05, |
|
"loss": 0.134, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3663003663003663, |
|
"grad_norm": 43.437068939208984, |
|
"learning_rate": 1.8349381017881708e-05, |
|
"loss": 0.2905, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3907203907203907, |
|
"grad_norm": 22.296707153320312, |
|
"learning_rate": 1.8074277854195327e-05, |
|
"loss": 0.2304, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.4151404151404152, |
|
"grad_norm": 13.2725248336792, |
|
"learning_rate": 1.7799174690508942e-05, |
|
"loss": 0.2818, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4395604395604396, |
|
"grad_norm": 4.746452808380127, |
|
"learning_rate": 1.7524071526822558e-05, |
|
"loss": 0.1635, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.463980463980464, |
|
"grad_norm": 15.717751502990723, |
|
"learning_rate": 1.7248968363136177e-05, |
|
"loss": 0.1695, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.463980463980464, |
|
"eval_accuracy": 0.9187538179596824, |
|
"eval_accuracy_label_negative": 0.9053030303030303, |
|
"eval_accuracy_label_positive": 0.9313609467455621, |
|
"eval_f1": 0.9187176730282378, |
|
"eval_loss": 0.22982099652290344, |
|
"eval_precision": 0.9188801834239158, |
|
"eval_recall": 0.9187538179596824, |
|
"eval_runtime": 6.5054, |
|
"eval_samples_per_second": 251.638, |
|
"eval_steps_per_second": 15.833, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4884004884004884, |
|
"grad_norm": 25.554302215576172, |
|
"learning_rate": 1.6973865199449796e-05, |
|
"loss": 0.1926, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.5128205128205128, |
|
"grad_norm": 6.077470779418945, |
|
"learning_rate": 1.669876203576341e-05, |
|
"loss": 0.2561, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.537240537240537, |
|
"grad_norm": 19.69429588317871, |
|
"learning_rate": 1.642365887207703e-05, |
|
"loss": 0.1501, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.5616605616605617, |
|
"grad_norm": 10.95654010772705, |
|
"learning_rate": 1.614855570839065e-05, |
|
"loss": 0.1852, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.5860805860805862, |
|
"grad_norm": 24.821308135986328, |
|
"learning_rate": 1.5873452544704268e-05, |
|
"loss": 0.2113, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.6105006105006106, |
|
"grad_norm": 13.17242431640625, |
|
"learning_rate": 1.5598349381017883e-05, |
|
"loss": 0.1237, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.6349206349206349, |
|
"grad_norm": 16.253307342529297, |
|
"learning_rate": 1.53232462173315e-05, |
|
"loss": 0.1646, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.6593406593406592, |
|
"grad_norm": 28.406679153442383, |
|
"learning_rate": 1.5048143053645119e-05, |
|
"loss": 0.2448, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6837606837606838, |
|
"grad_norm": 15.36701488494873, |
|
"learning_rate": 1.4773039889958736e-05, |
|
"loss": 0.1902, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.7081807081807083, |
|
"grad_norm": 33.92890548706055, |
|
"learning_rate": 1.4497936726272352e-05, |
|
"loss": 0.2024, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7081807081807083, |
|
"eval_accuracy": 0.9205864386072083, |
|
"eval_accuracy_label_negative": 0.9393939393939394, |
|
"eval_accuracy_label_positive": 0.9029585798816568, |
|
"eval_f1": 0.920605585060701, |
|
"eval_loss": 0.22175996005535126, |
|
"eval_precision": 0.9214190422768549, |
|
"eval_recall": 0.9205864386072083, |
|
"eval_runtime": 7.338, |
|
"eval_samples_per_second": 223.086, |
|
"eval_steps_per_second": 14.037, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7326007326007327, |
|
"grad_norm": 39.820068359375, |
|
"learning_rate": 1.4222833562585972e-05, |
|
"loss": 0.1628, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.757020757020757, |
|
"grad_norm": 14.987146377563477, |
|
"learning_rate": 1.3947730398899588e-05, |
|
"loss": 0.1733, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7814407814407813, |
|
"grad_norm": 7.142777442932129, |
|
"learning_rate": 1.3672627235213205e-05, |
|
"loss": 0.1429, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.8058608058608059, |
|
"grad_norm": 40.524967193603516, |
|
"learning_rate": 1.3397524071526824e-05, |
|
"loss": 0.1887, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.8302808302808304, |
|
"grad_norm": 15.210890769958496, |
|
"learning_rate": 1.3122420907840441e-05, |
|
"loss": 0.2109, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8547008547008548, |
|
"grad_norm": 14.5702543258667, |
|
"learning_rate": 1.284731774415406e-05, |
|
"loss": 0.1397, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.879120879120879, |
|
"grad_norm": 20.027212142944336, |
|
"learning_rate": 1.2572214580467677e-05, |
|
"loss": 0.1263, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.9035409035409034, |
|
"grad_norm": 13.660886764526367, |
|
"learning_rate": 1.2297111416781292e-05, |
|
"loss": 0.2006, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.927960927960928, |
|
"grad_norm": 8.097176551818848, |
|
"learning_rate": 1.2022008253094913e-05, |
|
"loss": 0.1901, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.9523809523809523, |
|
"grad_norm": 4.4079108238220215, |
|
"learning_rate": 1.1746905089408529e-05, |
|
"loss": 0.1155, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9523809523809523, |
|
"eval_accuracy": 0.9236408063530849, |
|
"eval_accuracy_label_negative": 0.9191919191919192, |
|
"eval_accuracy_label_positive": 0.927810650887574, |
|
"eval_f1": 0.923636013636066, |
|
"eval_loss": 0.20613956451416016, |
|
"eval_precision": 0.9236374318453521, |
|
"eval_recall": 0.9236408063530849, |
|
"eval_runtime": 6.4768, |
|
"eval_samples_per_second": 252.75, |
|
"eval_steps_per_second": 15.903, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9768009768009769, |
|
"grad_norm": 16.103919982910156, |
|
"learning_rate": 1.1471801925722146e-05, |
|
"loss": 0.1695, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 26.186500549316406, |
|
"learning_rate": 1.1196698762035765e-05, |
|
"loss": 0.1878, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.0244200244200243, |
|
"grad_norm": 8.27334213256836, |
|
"learning_rate": 1.0921595598349382e-05, |
|
"loss": 0.116, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.0488400488400487, |
|
"grad_norm": 9.153972625732422, |
|
"learning_rate": 1.0646492434663e-05, |
|
"loss": 0.1331, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.0732600732600734, |
|
"grad_norm": 8.734851837158203, |
|
"learning_rate": 1.0371389270976618e-05, |
|
"loss": 0.1064, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0976800976800978, |
|
"grad_norm": 16.807472229003906, |
|
"learning_rate": 1.0096286107290235e-05, |
|
"loss": 0.1323, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.122100122100122, |
|
"grad_norm": 7.338029861450195, |
|
"learning_rate": 9.821182943603852e-06, |
|
"loss": 0.0876, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.1465201465201464, |
|
"grad_norm": 51.00392150878906, |
|
"learning_rate": 9.54607977991747e-06, |
|
"loss": 0.0574, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.1709401709401708, |
|
"grad_norm": 15.209708213806152, |
|
"learning_rate": 9.270976616231088e-06, |
|
"loss": 0.1536, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.1953601953601956, |
|
"grad_norm": 5.431528568267822, |
|
"learning_rate": 8.995873452544705e-06, |
|
"loss": 0.1361, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1953601953601956, |
|
"eval_accuracy": 0.921808185705559, |
|
"eval_accuracy_label_negative": 0.9406565656565656, |
|
"eval_accuracy_label_positive": 0.9041420118343195, |
|
"eval_f1": 0.9218270375982287, |
|
"eval_loss": 0.2299249768257141, |
|
"eval_precision": 0.9226412544476359, |
|
"eval_recall": 0.921808185705559, |
|
"eval_runtime": 6.4607, |
|
"eval_samples_per_second": 253.378, |
|
"eval_steps_per_second": 15.943, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.21978021978022, |
|
"grad_norm": 18.443998336791992, |
|
"learning_rate": 8.720770288858322e-06, |
|
"loss": 0.1881, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.244200244200244, |
|
"grad_norm": 18.574554443359375, |
|
"learning_rate": 8.44566712517194e-06, |
|
"loss": 0.0946, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.2686202686202686, |
|
"grad_norm": 27.33018684387207, |
|
"learning_rate": 8.170563961485558e-06, |
|
"loss": 0.1302, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.293040293040293, |
|
"grad_norm": 15.219032287597656, |
|
"learning_rate": 7.895460797799176e-06, |
|
"loss": 0.1475, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.317460317460317, |
|
"grad_norm": 23.795124053955078, |
|
"learning_rate": 7.620357634112793e-06, |
|
"loss": 0.1098, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.341880341880342, |
|
"grad_norm": 12.746439933776855, |
|
"learning_rate": 7.345254470426411e-06, |
|
"loss": 0.0811, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.3663003663003663, |
|
"grad_norm": 2.0957300662994385, |
|
"learning_rate": 7.070151306740029e-06, |
|
"loss": 0.1033, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.3907203907203907, |
|
"grad_norm": 20.963695526123047, |
|
"learning_rate": 6.795048143053645e-06, |
|
"loss": 0.1716, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.415140415140415, |
|
"grad_norm": 6.032730579376221, |
|
"learning_rate": 6.519944979367263e-06, |
|
"loss": 0.0937, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.4395604395604398, |
|
"grad_norm": 18.961658477783203, |
|
"learning_rate": 6.244841815680881e-06, |
|
"loss": 0.1235, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4395604395604398, |
|
"eval_accuracy": 0.9211973121563837, |
|
"eval_accuracy_label_negative": 0.9633838383838383, |
|
"eval_accuracy_label_positive": 0.8816568047337278, |
|
"eval_f1": 0.9211597260760334, |
|
"eval_loss": 0.26676130294799805, |
|
"eval_precision": 0.9245979890029079, |
|
"eval_recall": 0.9211973121563837, |
|
"eval_runtime": 6.4755, |
|
"eval_samples_per_second": 252.8, |
|
"eval_steps_per_second": 15.906, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.463980463980464, |
|
"grad_norm": 11.030694961547852, |
|
"learning_rate": 5.969738651994499e-06, |
|
"loss": 0.1298, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.4884004884004884, |
|
"grad_norm": 19.397335052490234, |
|
"learning_rate": 5.6946354883081155e-06, |
|
"loss": 0.1046, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.5128205128205128, |
|
"grad_norm": 5.86328125, |
|
"learning_rate": 5.4195323246217335e-06, |
|
"loss": 0.0831, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.537240537240537, |
|
"grad_norm": 5.8682427406311035, |
|
"learning_rate": 5.1444291609353515e-06, |
|
"loss": 0.1152, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.5616605616605614, |
|
"grad_norm": 6.465247631072998, |
|
"learning_rate": 4.869325997248969e-06, |
|
"loss": 0.1316, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.586080586080586, |
|
"grad_norm": 74.92926025390625, |
|
"learning_rate": 4.594222833562587e-06, |
|
"loss": 0.0973, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.6105006105006106, |
|
"grad_norm": 4.780877590179443, |
|
"learning_rate": 4.319119669876204e-06, |
|
"loss": 0.1206, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.634920634920635, |
|
"grad_norm": 23.055227279663086, |
|
"learning_rate": 4.044016506189822e-06, |
|
"loss": 0.1001, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.659340659340659, |
|
"grad_norm": 50.33488082885742, |
|
"learning_rate": 3.768913342503439e-06, |
|
"loss": 0.1537, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.683760683760684, |
|
"grad_norm": 0.44596701860427856, |
|
"learning_rate": 3.4938101788170562e-06, |
|
"loss": 0.084, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.683760683760684, |
|
"eval_accuracy": 0.921808185705559, |
|
"eval_accuracy_label_negative": 0.9545454545454546, |
|
"eval_accuracy_label_positive": 0.8911242603550296, |
|
"eval_f1": 0.9218032836901542, |
|
"eval_loss": 0.2732768654823303, |
|
"eval_precision": 0.9239586674515976, |
|
"eval_recall": 0.921808185705559, |
|
"eval_runtime": 6.4795, |
|
"eval_samples_per_second": 252.643, |
|
"eval_steps_per_second": 15.896, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.7081807081807083, |
|
"grad_norm": 3.0834202766418457, |
|
"learning_rate": 3.2187070151306742e-06, |
|
"loss": 0.1546, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.7326007326007327, |
|
"grad_norm": 6.8805928230285645, |
|
"learning_rate": 2.943603851444292e-06, |
|
"loss": 0.0876, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.757020757020757, |
|
"grad_norm": 0.9852960109710693, |
|
"learning_rate": 2.6685006877579094e-06, |
|
"loss": 0.0884, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.7814407814407813, |
|
"grad_norm": 9.610347747802734, |
|
"learning_rate": 2.393397524071527e-06, |
|
"loss": 0.1781, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.8058608058608057, |
|
"grad_norm": 14.589119911193848, |
|
"learning_rate": 2.1182943603851446e-06, |
|
"loss": 0.1584, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.8302808302808304, |
|
"grad_norm": 5.850516319274902, |
|
"learning_rate": 1.8431911966987622e-06, |
|
"loss": 0.1266, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.8547008547008548, |
|
"grad_norm": 3.190080165863037, |
|
"learning_rate": 1.56808803301238e-06, |
|
"loss": 0.1563, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.879120879120879, |
|
"grad_norm": 8.95765209197998, |
|
"learning_rate": 1.2929848693259976e-06, |
|
"loss": 0.0896, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.9035409035409034, |
|
"grad_norm": 1.606522798538208, |
|
"learning_rate": 1.017881705639615e-06, |
|
"loss": 0.1167, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.927960927960928, |
|
"grad_norm": 31.470670700073242, |
|
"learning_rate": 7.427785419532325e-07, |
|
"loss": 0.1326, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.927960927960928, |
|
"eval_accuracy": 0.9248625534514355, |
|
"eval_accuracy_label_negative": 0.9191919191919192, |
|
"eval_accuracy_label_positive": 0.9301775147928995, |
|
"eval_f1": 0.9248544119857385, |
|
"eval_loss": 0.2394767850637436, |
|
"eval_precision": 0.9248635488205493, |
|
"eval_recall": 0.9248625534514355, |
|
"eval_runtime": 6.4694, |
|
"eval_samples_per_second": 253.036, |
|
"eval_steps_per_second": 15.921, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9523809523809526, |
|
"grad_norm": 16.254497528076172, |
|
"learning_rate": 4.6767537826685014e-07, |
|
"loss": 0.1154, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.976800976800977, |
|
"grad_norm": 18.331233978271484, |
|
"learning_rate": 1.925722145804677e-07, |
|
"loss": 0.1268, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.993894993894994, |
|
"step": 1227, |
|
"total_flos": 157098652813440.0, |
|
"train_loss": 0.22647988077286502, |
|
"train_runtime": 692.1682, |
|
"train_samples_per_second": 56.735, |
|
"train_steps_per_second": 1.773 |
|
}, |
|
{ |
|
"epoch": 2.993894993894994, |
|
"eval_accuracy": 0.9260843005497862, |
|
"eval_accuracy_label_negative": 0.9242424242424242, |
|
"eval_accuracy_label_positive": 0.927810650887574, |
|
"eval_f1": 0.9260857363073682, |
|
"eval_loss": 0.23794083297252655, |
|
"eval_precision": 0.9260878637932374, |
|
"eval_recall": 0.9260843005497862, |
|
"eval_runtime": 6.4878, |
|
"eval_samples_per_second": 252.32, |
|
"eval_steps_per_second": 15.876, |
|
"step": 1227 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1227, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 157098652813440.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|