|
{ |
|
"best_metric": 0.9565217391304348, |
|
"best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-15", |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6304347826086957, |
|
"eval_loss": 0.6141545176506042, |
|
"eval_runtime": 0.3002, |
|
"eval_samples_per_second": 306.507, |
|
"eval_steps_per_second": 9.995, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8695652173913043, |
|
"eval_loss": 0.3853473365306854, |
|
"eval_runtime": 0.2807, |
|
"eval_samples_per_second": 327.721, |
|
"eval_steps_per_second": 10.687, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8260869565217391, |
|
"eval_loss": 0.40699997544288635, |
|
"eval_runtime": 0.2954, |
|
"eval_samples_per_second": 311.393, |
|
"eval_steps_per_second": 10.154, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 6.74481725692749, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.494, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.1460711508989334, |
|
"eval_runtime": 0.2916, |
|
"eval_samples_per_second": 315.469, |
|
"eval_steps_per_second": 10.287, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.11890643835067749, |
|
"eval_runtime": 0.2956, |
|
"eval_samples_per_second": 311.209, |
|
"eval_steps_per_second": 10.148, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.15268591046333313, |
|
"eval_runtime": 0.2919, |
|
"eval_samples_per_second": 315.184, |
|
"eval_steps_per_second": 10.278, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 32.091392517089844, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.2024, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9021739130434783, |
|
"eval_loss": 0.3323056697845459, |
|
"eval_runtime": 0.2758, |
|
"eval_samples_per_second": 333.524, |
|
"eval_steps_per_second": 10.876, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.15201511979103088, |
|
"eval_runtime": 0.2782, |
|
"eval_samples_per_second": 330.705, |
|
"eval_steps_per_second": 10.784, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.1572241336107254, |
|
"eval_runtime": 0.2775, |
|
"eval_samples_per_second": 331.58, |
|
"eval_steps_per_second": 10.812, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 10.290038108825684, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.1419, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.18135777115821838, |
|
"eval_runtime": 0.2913, |
|
"eval_samples_per_second": 315.861, |
|
"eval_steps_per_second": 10.3, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.17784903943538666, |
|
"eval_runtime": 0.2749, |
|
"eval_samples_per_second": 334.701, |
|
"eval_steps_per_second": 10.914, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.15048673748970032, |
|
"eval_runtime": 0.2937, |
|
"eval_samples_per_second": 313.271, |
|
"eval_steps_per_second": 10.215, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.1890527755022049, |
|
"eval_runtime": 0.3016, |
|
"eval_samples_per_second": 305.089, |
|
"eval_steps_per_second": 9.949, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 6.0901970863342285, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.1053, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7934782608695652, |
|
"eval_loss": 0.7274413108825684, |
|
"eval_runtime": 0.272, |
|
"eval_samples_per_second": 338.198, |
|
"eval_steps_per_second": 11.028, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.2668982148170471, |
|
"eval_runtime": 0.2934, |
|
"eval_samples_per_second": 313.551, |
|
"eval_steps_per_second": 10.224, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.22397232055664062, |
|
"eval_runtime": 0.3087, |
|
"eval_samples_per_second": 298.059, |
|
"eval_steps_per_second": 9.719, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 7.414670467376709, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.3044, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8913043478260869, |
|
"eval_loss": 0.3497091829776764, |
|
"eval_runtime": 0.2758, |
|
"eval_samples_per_second": 333.612, |
|
"eval_steps_per_second": 10.879, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.2208346724510193, |
|
"eval_runtime": 0.274, |
|
"eval_samples_per_second": 335.712, |
|
"eval_steps_per_second": 10.947, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.17329861223697662, |
|
"eval_runtime": 0.285, |
|
"eval_samples_per_second": 322.844, |
|
"eval_steps_per_second": 10.528, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.56092643737793, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.151, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9239130434782609, |
|
"eval_loss": 0.20381057262420654, |
|
"eval_runtime": 0.2821, |
|
"eval_samples_per_second": 326.07, |
|
"eval_steps_per_second": 10.633, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.12818782031536102, |
|
"eval_runtime": 0.2765, |
|
"eval_samples_per_second": 332.708, |
|
"eval_steps_per_second": 10.849, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9239130434782609, |
|
"eval_loss": 0.3231411874294281, |
|
"eval_runtime": 0.3046, |
|
"eval_samples_per_second": 302.028, |
|
"eval_steps_per_second": 9.849, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.15651515126228333, |
|
"eval_runtime": 0.296, |
|
"eval_samples_per_second": 310.773, |
|
"eval_steps_per_second": 10.134, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 23.333333333333332, |
|
"grad_norm": 6.024500370025635, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.0875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.19810304045677185, |
|
"eval_runtime": 0.3099, |
|
"eval_samples_per_second": 296.864, |
|
"eval_steps_per_second": 9.68, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.19737455248832703, |
|
"eval_runtime": 0.2724, |
|
"eval_samples_per_second": 337.687, |
|
"eval_steps_per_second": 11.012, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.20447766780853271, |
|
"eval_runtime": 0.2918, |
|
"eval_samples_per_second": 315.301, |
|
"eval_steps_per_second": 10.282, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 4.34276008605957, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.0851, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.1840977817773819, |
|
"eval_runtime": 0.2713, |
|
"eval_samples_per_second": 339.118, |
|
"eval_steps_per_second": 11.058, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.20613481104373932, |
|
"eval_runtime": 0.2718, |
|
"eval_samples_per_second": 338.491, |
|
"eval_steps_per_second": 11.038, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.20765484869480133, |
|
"eval_runtime": 0.2926, |
|
"eval_samples_per_second": 314.467, |
|
"eval_steps_per_second": 10.254, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.4659526348114014, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.21993966400623322, |
|
"eval_runtime": 0.2942, |
|
"eval_samples_per_second": 312.716, |
|
"eval_steps_per_second": 10.197, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.2038496881723404, |
|
"eval_runtime": 0.3027, |
|
"eval_samples_per_second": 303.903, |
|
"eval_steps_per_second": 9.91, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.20770704746246338, |
|
"eval_runtime": 0.281, |
|
"eval_samples_per_second": 327.359, |
|
"eval_steps_per_second": 10.675, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.18766026198863983, |
|
"eval_runtime": 0.2757, |
|
"eval_samples_per_second": 333.675, |
|
"eval_steps_per_second": 10.881, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"grad_norm": 1.9909802675247192, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.0533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.2383040189743042, |
|
"eval_runtime": 0.2759, |
|
"eval_samples_per_second": 333.465, |
|
"eval_steps_per_second": 10.874, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9239130434782609, |
|
"eval_loss": 0.25708499550819397, |
|
"eval_runtime": 0.2913, |
|
"eval_samples_per_second": 315.812, |
|
"eval_steps_per_second": 10.298, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.23297645151615143, |
|
"eval_runtime": 0.2763, |
|
"eval_samples_per_second": 332.955, |
|
"eval_steps_per_second": 10.857, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 36.666666666666664, |
|
"grad_norm": 9.732784271240234, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.0451, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.24198591709136963, |
|
"eval_runtime": 0.2748, |
|
"eval_samples_per_second": 334.8, |
|
"eval_steps_per_second": 10.917, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9239130434782609, |
|
"eval_loss": 0.2881980240345001, |
|
"eval_runtime": 0.291, |
|
"eval_samples_per_second": 316.1, |
|
"eval_steps_per_second": 10.308, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.23858585953712463, |
|
"eval_runtime": 0.2975, |
|
"eval_samples_per_second": 309.204, |
|
"eval_steps_per_second": 10.083, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.4663212299346924, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0401, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.2512564957141876, |
|
"eval_runtime": 0.2721, |
|
"eval_samples_per_second": 338.098, |
|
"eval_steps_per_second": 11.025, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.2671690285205841, |
|
"eval_runtime": 0.2823, |
|
"eval_samples_per_second": 325.908, |
|
"eval_steps_per_second": 10.627, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.2950490117073059, |
|
"eval_runtime": 0.2748, |
|
"eval_samples_per_second": 334.771, |
|
"eval_steps_per_second": 10.916, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.32324346899986267, |
|
"eval_runtime": 0.2744, |
|
"eval_samples_per_second": 335.279, |
|
"eval_steps_per_second": 10.933, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 43.333333333333336, |
|
"grad_norm": 6.655643463134766, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.0329, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9239130434782609, |
|
"eval_loss": 0.3711928427219391, |
|
"eval_runtime": 0.2829, |
|
"eval_samples_per_second": 325.258, |
|
"eval_steps_per_second": 10.606, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 0.35285505652427673, |
|
"eval_runtime": 0.2761, |
|
"eval_samples_per_second": 333.224, |
|
"eval_steps_per_second": 10.866, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.29050472378730774, |
|
"eval_runtime": 0.292, |
|
"eval_samples_per_second": 315.053, |
|
"eval_steps_per_second": 10.273, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 46.666666666666664, |
|
"grad_norm": 5.307190895080566, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.0519, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.26701638102531433, |
|
"eval_runtime": 0.3064, |
|
"eval_samples_per_second": 300.308, |
|
"eval_steps_per_second": 9.793, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.2628593444824219, |
|
"eval_runtime": 0.2788, |
|
"eval_samples_per_second": 330.007, |
|
"eval_steps_per_second": 10.761, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.2760757505893707, |
|
"eval_runtime": 0.2738, |
|
"eval_samples_per_second": 336.03, |
|
"eval_steps_per_second": 10.957, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 4.582086563110352, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0281, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.3040381669998169, |
|
"eval_runtime": 0.2766, |
|
"eval_samples_per_second": 332.655, |
|
"eval_steps_per_second": 10.847, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.31911370158195496, |
|
"eval_runtime": 0.2968, |
|
"eval_samples_per_second": 309.998, |
|
"eval_steps_per_second": 10.109, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.32143697142601013, |
|
"eval_runtime": 0.3072, |
|
"eval_samples_per_second": 299.499, |
|
"eval_steps_per_second": 9.766, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.31315502524375916, |
|
"eval_runtime": 0.3028, |
|
"eval_samples_per_second": 303.834, |
|
"eval_steps_per_second": 9.908, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 53.333333333333336, |
|
"grad_norm": 4.059518814086914, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.028, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.31154391169548035, |
|
"eval_runtime": 0.2938, |
|
"eval_samples_per_second": 313.128, |
|
"eval_steps_per_second": 10.211, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9565217391304348, |
|
"eval_loss": 0.31155669689178467, |
|
"eval_runtime": 0.2755, |
|
"eval_samples_per_second": 333.966, |
|
"eval_steps_per_second": 10.89, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.3224737346172333, |
|
"eval_runtime": 0.302, |
|
"eval_samples_per_second": 304.679, |
|
"eval_steps_per_second": 9.935, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 56.666666666666664, |
|
"grad_norm": 9.457426071166992, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.0361, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.3235200047492981, |
|
"eval_runtime": 0.2841, |
|
"eval_samples_per_second": 323.798, |
|
"eval_steps_per_second": 10.559, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.3200394809246063, |
|
"eval_runtime": 0.2914, |
|
"eval_samples_per_second": 315.666, |
|
"eval_steps_per_second": 10.293, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.318322092294693, |
|
"eval_runtime": 0.2746, |
|
"eval_samples_per_second": 335.002, |
|
"eval_steps_per_second": 10.924, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 9.799005508422852, |
|
"learning_rate": 0.0, |
|
"loss": 0.0312, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9456521739130435, |
|
"eval_loss": 0.31814736127853394, |
|
"eval_runtime": 0.2764, |
|
"eval_samples_per_second": 332.886, |
|
"eval_steps_per_second": 10.855, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 180, |
|
"total_flos": 4.3676735454019584e+17, |
|
"train_loss": 0.1091307305627399, |
|
"train_runtime": 203.4457, |
|
"train_samples_per_second": 109.71, |
|
"train_steps_per_second": 0.885 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.3676735454019584e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|