|
{ |
|
"best_metric": 0.1013597771525383, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02297266253158741, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00011486331265793706, |
|
"grad_norm": 19.581317901611328, |
|
"learning_rate": 5e-06, |
|
"loss": 1.994, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00011486331265793706, |
|
"eval_loss": 0.5246481895446777, |
|
"eval_runtime": 1466.3245, |
|
"eval_samples_per_second": 10.0, |
|
"eval_steps_per_second": 5.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00022972662531587412, |
|
"grad_norm": 17.236621856689453, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0861, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00034458993797381116, |
|
"grad_norm": 11.309477806091309, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.1709, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00045945325063174823, |
|
"grad_norm": 5.152942180633545, |
|
"learning_rate": 2e-05, |
|
"loss": 1.712, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0005743165632896852, |
|
"grad_norm": 4.796749114990234, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.881, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0006891798759476223, |
|
"grad_norm": 3.7207891941070557, |
|
"learning_rate": 3e-05, |
|
"loss": 1.899, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008040431886055594, |
|
"grad_norm": 2.4096086025238037, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.8971, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0009189065012634965, |
|
"grad_norm": 4.16554594039917, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8821, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0010337698139214334, |
|
"grad_norm": 2.0596752166748047, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.1518, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0011486331265793705, |
|
"grad_norm": 3.1254591941833496, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8471, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0012634964392373076, |
|
"grad_norm": 1.5779181718826294, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.5703, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0013783597518952446, |
|
"grad_norm": 4.009050369262695, |
|
"learning_rate": 6e-05, |
|
"loss": 1.8472, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0014932230645531817, |
|
"grad_norm": 3.238170623779297, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.6825, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0016080863772111188, |
|
"grad_norm": 7.195780277252197, |
|
"learning_rate": 7e-05, |
|
"loss": 1.6415, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0017229496898690559, |
|
"grad_norm": 1.9010658264160156, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.5549, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001837813002526993, |
|
"grad_norm": 2.798220634460449, |
|
"learning_rate": 8e-05, |
|
"loss": 2.0429, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00195267631518493, |
|
"grad_norm": 2.5278286933898926, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.7963, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002067539627842867, |
|
"grad_norm": 1.599841833114624, |
|
"learning_rate": 9e-05, |
|
"loss": 1.5025, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002182402940500804, |
|
"grad_norm": 2.7871410846710205, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.5943, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002297266253158741, |
|
"grad_norm": 1.6238923072814941, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5476, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002412129565816678, |
|
"grad_norm": 1.6949362754821777, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 1.789, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002526992878474615, |
|
"grad_norm": 1.7465438842773438, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 1.6521, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.002641856191132552, |
|
"grad_norm": 2.695739507675171, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 1.6917, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0027567195037904893, |
|
"grad_norm": 2.790419101715088, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 1.9761, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0028715828164484264, |
|
"grad_norm": 4.338637828826904, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 1.7246, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0029864461291063634, |
|
"grad_norm": 2.29007625579834, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 1.4761, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0031013094417643005, |
|
"grad_norm": 1.7599883079528809, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 1.6964, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0032161727544222376, |
|
"grad_norm": 1.6065396070480347, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 1.5706, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0033310360670801746, |
|
"grad_norm": 2.0913872718811035, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 1.6397, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0034458993797381117, |
|
"grad_norm": 1.4033796787261963, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 1.493, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003560762692396049, |
|
"grad_norm": 1.7742940187454224, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 1.5057, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003675626005053986, |
|
"grad_norm": 2.7684073448181152, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 1.6441, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.003790489317711923, |
|
"grad_norm": 2.2858612537384033, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 1.6578, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00390535263036986, |
|
"grad_norm": 1.846916913986206, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 1.3874, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004020215943027797, |
|
"grad_norm": 1.7534124851226807, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 1.6324, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004135079255685734, |
|
"grad_norm": 2.536592960357666, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 1.5962, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004249942568343671, |
|
"grad_norm": 2.0697085857391357, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 1.3835, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.004364805881001608, |
|
"grad_norm": 2.500739336013794, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.4074, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004479669193659545, |
|
"grad_norm": 1.6976559162139893, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 1.1213, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.004594532506317482, |
|
"grad_norm": 1.8822135925292969, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 1.1204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0047093958189754195, |
|
"grad_norm": 2.3586180210113525, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 1.3198, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004824259131633356, |
|
"grad_norm": 2.3203370571136475, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 1.5349, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004939122444291294, |
|
"grad_norm": 2.3646602630615234, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 1.4018, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.00505398575694923, |
|
"grad_norm": 2.3897457122802734, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 1.3423, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005168849069607168, |
|
"grad_norm": 3.052333354949951, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 1.4598, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005283712382265104, |
|
"grad_norm": 2.3878042697906494, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 1.3684, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.005398575694923042, |
|
"grad_norm": 3.579554557800293, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 1.3765, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.005513439007580979, |
|
"grad_norm": 2.688589096069336, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 1.4563, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005628302320238916, |
|
"grad_norm": 2.9909780025482178, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 1.1294, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.005743165632896853, |
|
"grad_norm": 1.66656494140625, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.2631, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005743165632896853, |
|
"eval_loss": 0.30388450622558594, |
|
"eval_runtime": 1471.9652, |
|
"eval_samples_per_second": 9.962, |
|
"eval_steps_per_second": 4.981, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00585802894555479, |
|
"grad_norm": 2.137791156768799, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 1.2646, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005972892258212727, |
|
"grad_norm": 2.5329372882843018, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 1.0451, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0060877555708706635, |
|
"grad_norm": 2.620466470718384, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 1.1558, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.006202618883528601, |
|
"grad_norm": 2.2557244300842285, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 1.3604, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.006317482196186538, |
|
"grad_norm": 1.8479572534561157, |
|
"learning_rate": 9.09576022144496e-05, |
|
"loss": 1.2608, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.006432345508844475, |
|
"grad_norm": 2.1417593955993652, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.3318, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.006547208821502412, |
|
"grad_norm": 2.672443151473999, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 1.0541, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006662072134160349, |
|
"grad_norm": 2.344717025756836, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 1.2237, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.006776935446818286, |
|
"grad_norm": 2.390180826187134, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 0.9352, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.006891798759476223, |
|
"grad_norm": 3.0167908668518066, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 1.158, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00700666207213416, |
|
"grad_norm": 3.0811192989349365, |
|
"learning_rate": 8.773547901113862e-05, |
|
"loss": 1.1718, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.007121525384792098, |
|
"grad_norm": 3.3268229961395264, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 1.2555, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.007236388697450034, |
|
"grad_norm": 2.296539306640625, |
|
"learning_rate": 8.656768508095853e-05, |
|
"loss": 1.0316, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.007351252010107972, |
|
"grad_norm": 2.4139859676361084, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.8659, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.007466115322765908, |
|
"grad_norm": 2.5878524780273438, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.16, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.007580978635423846, |
|
"grad_norm": 2.180026054382324, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 1.1024, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0076958419480817825, |
|
"grad_norm": 1.9273844957351685, |
|
"learning_rate": 8.409991800312493e-05, |
|
"loss": 0.8108, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.00781070526073972, |
|
"grad_norm": 3.0962324142456055, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 1.0571, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.007925568573397657, |
|
"grad_norm": 2.6275441646575928, |
|
"learning_rate": 8.280295144952536e-05, |
|
"loss": 0.8768, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.008040431886055594, |
|
"grad_norm": 3.156315803527832, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 1.0539, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008155295198713532, |
|
"grad_norm": 2.818579912185669, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 0.9232, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.008270158511371467, |
|
"grad_norm": 2.329266309738159, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 1.0234, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.008385021824029405, |
|
"grad_norm": 3.1287198066711426, |
|
"learning_rate": 8.009075115760243e-05, |
|
"loss": 1.2203, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.008499885136687342, |
|
"grad_norm": 2.1660828590393066, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.8592, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.00861474844934528, |
|
"grad_norm": 2.362858533859253, |
|
"learning_rate": 7.86788218175523e-05, |
|
"loss": 1.0678, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.008729611762003216, |
|
"grad_norm": 2.5020713806152344, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 0.8854, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.008844475074661153, |
|
"grad_norm": 2.5885136127471924, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 0.8222, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.00895933838731909, |
|
"grad_norm": 3.7722344398498535, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 1.1339, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.009074201699977026, |
|
"grad_norm": 2.223078489303589, |
|
"learning_rate": 7.575190374550272e-05, |
|
"loss": 0.7033, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.009189065012634964, |
|
"grad_norm": 2.346508026123047, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.7375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009303928325292901, |
|
"grad_norm": 2.3102056980133057, |
|
"learning_rate": 7.424048101231686e-05, |
|
"loss": 0.8455, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.009418791637950839, |
|
"grad_norm": 2.787309169769287, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 1.0711, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.009533654950608775, |
|
"grad_norm": 2.9215545654296875, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.8927, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.009648518263266712, |
|
"grad_norm": 2.4672605991363525, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.713, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.00976338157592465, |
|
"grad_norm": 3.549206018447876, |
|
"learning_rate": 7.113091308703498e-05, |
|
"loss": 1.0348, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.009878244888582587, |
|
"grad_norm": 2.957688093185425, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.9689, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.009993108201240523, |
|
"grad_norm": 2.4251294136047363, |
|
"learning_rate": 6.953655642446368e-05, |
|
"loss": 0.6512, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01010797151389846, |
|
"grad_norm": 2.5786890983581543, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 0.7163, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.010222834826556398, |
|
"grad_norm": 3.341750144958496, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 0.6509, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.010337698139214336, |
|
"grad_norm": 3.332127094268799, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 1.0058, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010452561451872271, |
|
"grad_norm": 3.31768536567688, |
|
"learning_rate": 6.627840772285784e-05, |
|
"loss": 0.7874, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.010567424764530209, |
|
"grad_norm": 2.435286521911621, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.6309, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.010682288077188146, |
|
"grad_norm": 2.9863882064819336, |
|
"learning_rate": 6.461858523613684e-05, |
|
"loss": 0.8979, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.010797151389846084, |
|
"grad_norm": 3.188188314437866, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 0.8585, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.01091201470250402, |
|
"grad_norm": 3.706717014312744, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.8157, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.011026878015161957, |
|
"grad_norm": 3.348717212677002, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.6131, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.011141741327819895, |
|
"grad_norm": 3.345127820968628, |
|
"learning_rate": 6.124755271719325e-05, |
|
"loss": 0.7124, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.011256604640477832, |
|
"grad_norm": 3.588732957839966, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 0.6565, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.011371467953135768, |
|
"grad_norm": 3.6394217014312744, |
|
"learning_rate": 5.9540449768827246e-05, |
|
"loss": 0.6834, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.011486331265793705, |
|
"grad_norm": 3.545775890350342, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.9438, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011486331265793705, |
|
"eval_loss": 0.17487511038780212, |
|
"eval_runtime": 1471.3897, |
|
"eval_samples_per_second": 9.965, |
|
"eval_steps_per_second": 4.983, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011601194578451643, |
|
"grad_norm": 2.6634979248046875, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 0.6536, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01171605789110958, |
|
"grad_norm": 2.7935872077941895, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 0.8657, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.011830921203767516, |
|
"grad_norm": 3.1238162517547607, |
|
"learning_rate": 5.6093467170257374e-05, |
|
"loss": 0.6678, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.011945784516425454, |
|
"grad_norm": 3.002263069152832, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.6514, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.012060647829083391, |
|
"grad_norm": 2.975006580352783, |
|
"learning_rate": 5.435778713738292e-05, |
|
"loss": 0.4484, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.012175511141741327, |
|
"grad_norm": 2.8138067722320557, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 0.5668, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.012290374454399264, |
|
"grad_norm": 2.3226609230041504, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 0.4671, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.012405237767057202, |
|
"grad_norm": 3.791348934173584, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.6958, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01252010107971514, |
|
"grad_norm": 2.7212183475494385, |
|
"learning_rate": 5.0872620321864185e-05, |
|
"loss": 0.6117, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.012634964392373075, |
|
"grad_norm": 3.5150516033172607, |
|
"learning_rate": 5e-05, |
|
"loss": 0.822, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.012749827705031013, |
|
"grad_norm": 2.905332088470459, |
|
"learning_rate": 4.912737967813583e-05, |
|
"loss": 0.6823, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01286469101768895, |
|
"grad_norm": 3.329963207244873, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 0.559, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.012979554330346888, |
|
"grad_norm": 3.3605265617370605, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 0.5544, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.013094417643004824, |
|
"grad_norm": 3.592366933822632, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 0.5781, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.013209280955662761, |
|
"grad_norm": 3.577732563018799, |
|
"learning_rate": 4.564221286261709e-05, |
|
"loss": 0.6123, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.013324144268320699, |
|
"grad_norm": 4.342945575714111, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.7269, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.013439007580978636, |
|
"grad_norm": 2.921304941177368, |
|
"learning_rate": 4.390653282974264e-05, |
|
"loss": 0.5198, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.013553870893636572, |
|
"grad_norm": 3.515984058380127, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 0.9182, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.01366873420629451, |
|
"grad_norm": 2.688448190689087, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 0.6087, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.013783597518952447, |
|
"grad_norm": 2.6708593368530273, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.5885, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.013898460831610384, |
|
"grad_norm": 2.5848822593688965, |
|
"learning_rate": 4.045955023117276e-05, |
|
"loss": 0.5533, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.01401332414426832, |
|
"grad_norm": 3.2022759914398193, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 0.5861, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.014128187456926258, |
|
"grad_norm": 2.4787240028381348, |
|
"learning_rate": 3.875244728280676e-05, |
|
"loss": 0.4025, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.014243050769584195, |
|
"grad_norm": 3.095510959625244, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 0.5104, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.014357914082242133, |
|
"grad_norm": 3.846745491027832, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.5936, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.014472777394900068, |
|
"grad_norm": 2.9915177822113037, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.5739, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.014587640707558006, |
|
"grad_norm": 2.5920958518981934, |
|
"learning_rate": 3.5381414763863166e-05, |
|
"loss": 0.4191, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.014702504020215943, |
|
"grad_norm": 2.4326400756835938, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.437, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.014817367332873881, |
|
"grad_norm": 2.664111614227295, |
|
"learning_rate": 3.372159227714218e-05, |
|
"loss": 0.4107, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.014932230645531817, |
|
"grad_norm": 3.015989303588867, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 0.6345, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.015047093958189754, |
|
"grad_norm": 2.8984575271606445, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 0.5547, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.015161957270847692, |
|
"grad_norm": 3.772346019744873, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.6239, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.015276820583505627, |
|
"grad_norm": 3.629082441329956, |
|
"learning_rate": 3.046344357553632e-05, |
|
"loss": 0.5739, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.015391683896163565, |
|
"grad_norm": 3.2363765239715576, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 0.661, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.015506547208821502, |
|
"grad_norm": 2.6455376148223877, |
|
"learning_rate": 2.886908691296504e-05, |
|
"loss": 0.4586, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01562141052147944, |
|
"grad_norm": 2.9570345878601074, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 0.6333, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.015736273834137376, |
|
"grad_norm": 2.019033670425415, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 0.3232, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.015851137146795313, |
|
"grad_norm": 2.6063356399536133, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.4487, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01596600045945325, |
|
"grad_norm": 3.5841784477233887, |
|
"learning_rate": 2.575951898768315e-05, |
|
"loss": 0.6579, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.01608086377211119, |
|
"grad_norm": 3.1793031692504883, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.6123, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.016195727084769126, |
|
"grad_norm": 1.9112600088119507, |
|
"learning_rate": 2.4248096254497288e-05, |
|
"loss": 0.2844, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.016310590397427063, |
|
"grad_norm": 2.6747708320617676, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 0.4366, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.016425453710084997, |
|
"grad_norm": 2.7817370891571045, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 0.4206, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.016540317022742935, |
|
"grad_norm": 3.0754709243774414, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.4934, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.016655180335400872, |
|
"grad_norm": 2.9075064659118652, |
|
"learning_rate": 2.132117818244771e-05, |
|
"loss": 0.3818, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01677004364805881, |
|
"grad_norm": 3.9040424823760986, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.6472, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.016884906960716747, |
|
"grad_norm": 2.7987887859344482, |
|
"learning_rate": 1.9909248842397584e-05, |
|
"loss": 0.3802, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.016999770273374685, |
|
"grad_norm": 3.3494410514831543, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 0.5155, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.017114633586032622, |
|
"grad_norm": 2.892005681991577, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 0.385, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.01722949689869056, |
|
"grad_norm": 2.724477529525757, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 0.5024, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01722949689869056, |
|
"eval_loss": 0.11594025045633316, |
|
"eval_runtime": 1471.5665, |
|
"eval_samples_per_second": 9.964, |
|
"eval_steps_per_second": 4.982, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.017344360211348494, |
|
"grad_norm": 3.007096767425537, |
|
"learning_rate": 1.7197048550474643e-05, |
|
"loss": 0.5254, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.01745922352400643, |
|
"grad_norm": 2.6034908294677734, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.4203, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.01757408683666437, |
|
"grad_norm": 3.8404593467712402, |
|
"learning_rate": 1.5900081996875083e-05, |
|
"loss": 0.7015, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.017688950149322306, |
|
"grad_norm": 2.672938346862793, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 0.418, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.017803813461980244, |
|
"grad_norm": 2.8745882511138916, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.4368, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.01791867677463818, |
|
"grad_norm": 1.8944249153137207, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.2554, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.01803354008729612, |
|
"grad_norm": 2.5466866493225098, |
|
"learning_rate": 1.3432314919041478e-05, |
|
"loss": 0.4435, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.018148403399954053, |
|
"grad_norm": 3.0349109172821045, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 0.4948, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.01826326671261199, |
|
"grad_norm": 2.655149459838867, |
|
"learning_rate": 1.22645209888614e-05, |
|
"loss": 0.4062, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.018378130025269928, |
|
"grad_norm": 2.7073023319244385, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 0.3941, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.018492993337927865, |
|
"grad_norm": 2.6231350898742676, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 0.4092, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.018607856650585803, |
|
"grad_norm": 2.469143867492676, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 0.3605, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.01872271996324374, |
|
"grad_norm": 2.735646963119507, |
|
"learning_rate": 1.006822449763537e-05, |
|
"loss": 0.4245, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.018837583275901678, |
|
"grad_norm": 4.04184627532959, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.5681, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.018952446588559616, |
|
"grad_norm": 2.9459590911865234, |
|
"learning_rate": 9.042397785550405e-06, |
|
"loss": 0.3627, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.01906730990121755, |
|
"grad_norm": 3.2683775424957275, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 0.4684, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.019182173213875487, |
|
"grad_norm": 2.3902087211608887, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 0.3512, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.019297036526533425, |
|
"grad_norm": 2.420001745223999, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.3433, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.019411899839191362, |
|
"grad_norm": 2.396726131439209, |
|
"learning_rate": 7.1416349648943894e-06, |
|
"loss": 0.3181, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.0195267631518493, |
|
"grad_norm": 3.2419331073760986, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.5015, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.019641626464507237, |
|
"grad_norm": 1.9583468437194824, |
|
"learning_rate": 6.269014643030213e-06, |
|
"loss": 0.2504, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.019756489777165175, |
|
"grad_norm": 3.0308303833007812, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 0.5331, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.019871353089823112, |
|
"grad_norm": 2.8468117713928223, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 0.3874, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.019986216402481046, |
|
"grad_norm": 2.1451475620269775, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 0.274, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.020101079715138984, |
|
"grad_norm": 2.728893756866455, |
|
"learning_rate": 4.684610648167503e-06, |
|
"loss": 0.401, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02021594302779692, |
|
"grad_norm": 3.027879238128662, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.4416, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.02033080634045486, |
|
"grad_norm": 2.050403118133545, |
|
"learning_rate": 3.974757327377981e-06, |
|
"loss": 0.2707, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.020445669653112796, |
|
"grad_norm": 3.0397396087646484, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 0.487, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.020560532965770734, |
|
"grad_norm": 3.4382946491241455, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 0.6042, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.02067539627842867, |
|
"grad_norm": 2.5902884006500244, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.3982, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02079025959108661, |
|
"grad_norm": 2.472612142562866, |
|
"learning_rate": 2.724071220034158e-06, |
|
"loss": 0.3634, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.020905122903744543, |
|
"grad_norm": 2.7177581787109375, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.4134, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.02101998621640248, |
|
"grad_norm": 2.9707839488983154, |
|
"learning_rate": 2.1847622018482283e-06, |
|
"loss": 0.5281, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.021134849529060418, |
|
"grad_norm": 2.7435781955718994, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 0.4096, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.021249712841718355, |
|
"grad_norm": 2.72346568107605, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.4428, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.021364576154376293, |
|
"grad_norm": 2.1403918266296387, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.2873, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02147943946703423, |
|
"grad_norm": 3.263788938522339, |
|
"learning_rate": 1.2814967607382432e-06, |
|
"loss": 0.5087, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.021594302779692168, |
|
"grad_norm": 2.661633253097534, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.402, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.021709166092350102, |
|
"grad_norm": 2.249889373779297, |
|
"learning_rate": 9.186408276168013e-07, |
|
"loss": 0.3121, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.02182402940500804, |
|
"grad_norm": 2.8329484462738037, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 0.4427, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.021938892717665977, |
|
"grad_norm": 2.6768405437469482, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 0.3991, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.022053756030323914, |
|
"grad_norm": 2.998131275177002, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.4568, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.022168619342981852, |
|
"grad_norm": 3.1869330406188965, |
|
"learning_rate": 3.7269241793390085e-07, |
|
"loss": 0.4539, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.02228348265563979, |
|
"grad_norm": 2.072638511657715, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 0.2798, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.022398345968297727, |
|
"grad_norm": 2.542144298553467, |
|
"learning_rate": 1.9026509541272275e-07, |
|
"loss": 0.3783, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.022513209280955664, |
|
"grad_norm": 2.675973892211914, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 0.4263, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.0226280725936136, |
|
"grad_norm": 2.341843366622925, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 0.3602, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.022742935906271536, |
|
"grad_norm": 2.840686082839966, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 0.4262, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.022857799218929473, |
|
"grad_norm": 1.991869568824768, |
|
"learning_rate": 7.615242180436522e-09, |
|
"loss": 0.272, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.02297266253158741, |
|
"grad_norm": 3.1009721755981445, |
|
"learning_rate": 0.0, |
|
"loss": 0.5344, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02297266253158741, |
|
"eval_loss": 0.1013597771525383, |
|
"eval_runtime": 1471.8234, |
|
"eval_samples_per_second": 9.962, |
|
"eval_steps_per_second": 4.982, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.861996017975296e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|