|
{ |
|
"best_metric": 0.34081346423562414, |
|
"best_model_checkpoint": "videomae-base-finetuned-elderf1/checkpoint-73", |
|
"epoch": 9.0875, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.3531270027160645, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 1.7775, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.537198543548584, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.6877, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.151556968688965, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.7997, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.214244365692139, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 1.8051, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.6136474609375, |
|
"learning_rate": 0.0006944444444444445, |
|
"loss": 1.7065, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.423031330108643, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 1.7922, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.5037877559661865, |
|
"learning_rate": 0.0009722222222222222, |
|
"loss": 1.7358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.34081346423562414, |
|
"eval_loss": 1.692335844039917, |
|
"eval_runtime": 379.7283, |
|
"eval_samples_per_second": 3.755, |
|
"eval_steps_per_second": 0.237, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.8850014209747314, |
|
"learning_rate": 0.0009876543209876543, |
|
"loss": 1.7738, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 6.418526649475098, |
|
"learning_rate": 0.0009722222222222222, |
|
"loss": 1.8192, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 6.298271179199219, |
|
"learning_rate": 0.0009567901234567902, |
|
"loss": 1.7833, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.652337551116943, |
|
"learning_rate": 0.000941358024691358, |
|
"loss": 1.7534, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 3.369626522064209, |
|
"learning_rate": 0.000925925925925926, |
|
"loss": 1.7376, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.182107448577881, |
|
"learning_rate": 0.0009104938271604939, |
|
"loss": 1.6406, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 3.763148069381714, |
|
"learning_rate": 0.0008950617283950618, |
|
"loss": 1.7163, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.3373071528751753, |
|
"eval_loss": 1.66623854637146, |
|
"eval_runtime": 421.0248, |
|
"eval_samples_per_second": 3.387, |
|
"eval_steps_per_second": 0.214, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 4.109099864959717, |
|
"learning_rate": 0.0008796296296296296, |
|
"loss": 1.6734, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 3.7187507152557373, |
|
"learning_rate": 0.0008641975308641975, |
|
"loss": 1.7535, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 2.9596164226531982, |
|
"learning_rate": 0.0008487654320987654, |
|
"loss": 1.7282, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 5.126859188079834, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 1.7021, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 2.36877703666687, |
|
"learning_rate": 0.0008179012345679012, |
|
"loss": 1.6977, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 4.42868185043335, |
|
"learning_rate": 0.0008024691358024692, |
|
"loss": 1.6882, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 3.4869942665100098, |
|
"learning_rate": 0.0007870370370370372, |
|
"loss": 1.7018, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.34081346423562414, |
|
"eval_loss": 1.6378456354141235, |
|
"eval_runtime": 423.5487, |
|
"eval_samples_per_second": 3.367, |
|
"eval_steps_per_second": 0.212, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.7394754886627197, |
|
"learning_rate": 0.0007716049382716049, |
|
"loss": 1.7356, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 1.3488916158676147, |
|
"learning_rate": 0.0007561728395061729, |
|
"loss": 1.7024, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 3.196521282196045, |
|
"learning_rate": 0.0007407407407407407, |
|
"loss": 1.7094, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 3.9984352588653564, |
|
"learning_rate": 0.0007253086419753087, |
|
"loss": 1.6481, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 3.6886866092681885, |
|
"learning_rate": 0.0007098765432098766, |
|
"loss": 1.652, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 2.245149850845337, |
|
"learning_rate": 0.0006944444444444445, |
|
"loss": 1.7479, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 4.62326192855835, |
|
"learning_rate": 0.0006790123456790124, |
|
"loss": 1.7129, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 4.474867343902588, |
|
"learning_rate": 0.0006635802469135802, |
|
"loss": 1.7334, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.34011220196353437, |
|
"eval_loss": 1.6562532186508179, |
|
"eval_runtime": 383.4418, |
|
"eval_samples_per_second": 3.719, |
|
"eval_steps_per_second": 0.235, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 1.4795947074890137, |
|
"learning_rate": 0.0006481481481481481, |
|
"loss": 1.7427, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 2.173116683959961, |
|
"learning_rate": 0.0006327160493827161, |
|
"loss": 1.6894, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 2.731816291809082, |
|
"learning_rate": 0.0006172839506172839, |
|
"loss": 1.6818, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 2.120103120803833, |
|
"learning_rate": 0.0006018518518518519, |
|
"loss": 1.6782, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 3.7916502952575684, |
|
"learning_rate": 0.0005864197530864199, |
|
"loss": 1.7388, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 2.852003812789917, |
|
"learning_rate": 0.0005709876543209876, |
|
"loss": 1.7422, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 6.300606727600098, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 1.672, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.23983169705469845, |
|
"eval_loss": 1.6567574739456177, |
|
"eval_runtime": 310.9306, |
|
"eval_samples_per_second": 4.586, |
|
"eval_steps_per_second": 0.289, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 5.469892501831055, |
|
"learning_rate": 0.0005401234567901234, |
|
"loss": 1.6958, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 3.4026269912719727, |
|
"learning_rate": 0.0005246913580246914, |
|
"loss": 1.7383, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 2.7335870265960693, |
|
"learning_rate": 0.0005092592592592593, |
|
"loss": 1.6763, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 4.079995155334473, |
|
"learning_rate": 0.0004938271604938272, |
|
"loss": 1.693, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 2.8691930770874023, |
|
"learning_rate": 0.0004783950617283951, |
|
"loss": 1.7293, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 2.4010772705078125, |
|
"learning_rate": 0.000462962962962963, |
|
"loss": 1.6812, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 4.233634948730469, |
|
"learning_rate": 0.0004475308641975309, |
|
"loss": 1.7095, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.3387096774193548, |
|
"eval_loss": 1.6313395500183105, |
|
"eval_runtime": 306.7441, |
|
"eval_samples_per_second": 4.649, |
|
"eval_steps_per_second": 0.293, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.844386100769043, |
|
"learning_rate": 0.00043209876543209873, |
|
"loss": 1.6502, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 3.7761685848236084, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.6892, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 2.8903658390045166, |
|
"learning_rate": 0.0004012345679012346, |
|
"loss": 1.6006, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 2.548739194869995, |
|
"learning_rate": 0.00038580246913580245, |
|
"loss": 1.7113, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 3.1980948448181152, |
|
"learning_rate": 0.00037037037037037035, |
|
"loss": 1.7259, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 3.119049549102783, |
|
"learning_rate": 0.0003549382716049383, |
|
"loss": 1.6634, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 3.465067148208618, |
|
"learning_rate": 0.0003395061728395062, |
|
"loss": 1.71, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 1.507797122001648, |
|
"learning_rate": 0.00032407407407407406, |
|
"loss": 1.7119, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.34081346423562414, |
|
"eval_loss": 1.6309115886688232, |
|
"eval_runtime": 295.3653, |
|
"eval_samples_per_second": 4.828, |
|
"eval_steps_per_second": 0.305, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 1.939512848854065, |
|
"learning_rate": 0.00030864197530864197, |
|
"loss": 1.6486, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 2.4698586463928223, |
|
"learning_rate": 0.00029320987654320993, |
|
"loss": 1.6754, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 1.9678858518600464, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.6025, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 3.6356217861175537, |
|
"learning_rate": 0.0002623456790123457, |
|
"loss": 1.6792, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 2.781039237976074, |
|
"learning_rate": 0.0002469135802469136, |
|
"loss": 1.7165, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 4.021714687347412, |
|
"learning_rate": 0.0002314814814814815, |
|
"loss": 1.6836, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 4.392849922180176, |
|
"learning_rate": 0.00021604938271604937, |
|
"loss": 1.6981, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.3288920056100982, |
|
"eval_loss": 1.6518133878707886, |
|
"eval_runtime": 319.5699, |
|
"eval_samples_per_second": 4.462, |
|
"eval_steps_per_second": 0.282, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 2.291691303253174, |
|
"learning_rate": 0.0002006172839506173, |
|
"loss": 1.6548, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 4.06191873550415, |
|
"learning_rate": 0.00018518518518518518, |
|
"loss": 1.5957, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 3.735381603240967, |
|
"learning_rate": 0.0001697530864197531, |
|
"loss": 1.6757, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 3.7696151733398438, |
|
"learning_rate": 0.00015432098765432098, |
|
"loss": 1.6349, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 2.210860013961792, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 1.7561, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 2.812994956970215, |
|
"learning_rate": 0.0001234567901234568, |
|
"loss": 1.6331, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 3.606325626373291, |
|
"learning_rate": 0.00010802469135802468, |
|
"loss": 1.7066, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.33099579242636745, |
|
"eval_loss": 1.6313475370407104, |
|
"eval_runtime": 304.3275, |
|
"eval_samples_per_second": 4.686, |
|
"eval_steps_per_second": 0.296, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.993828296661377, |
|
"learning_rate": 9.259259259259259e-05, |
|
"loss": 1.6485, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 1.9592925310134888, |
|
"learning_rate": 7.716049382716049e-05, |
|
"loss": 1.6776, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 3.105025291442871, |
|
"learning_rate": 6.17283950617284e-05, |
|
"loss": 1.6466, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 3.643643856048584, |
|
"learning_rate": 4.6296296296296294e-05, |
|
"loss": 1.6544, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 2.5237057209014893, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 1.6616, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 3.834527015686035, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 1.6271, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 1.777999997138977, |
|
"learning_rate": 0.0, |
|
"loss": 1.6476, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.3288920056100982, |
|
"eval_loss": 1.6337770223617554, |
|
"eval_runtime": 340.2408, |
|
"eval_samples_per_second": 4.191, |
|
"eval_steps_per_second": 0.265, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"step": 720, |
|
"total_flos": 1.4231811343419113e+19, |
|
"train_loss": 1.6995894723468357, |
|
"train_runtime": 6776.3985, |
|
"train_samples_per_second": 1.7, |
|
"train_steps_per_second": 0.106 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.34811715481171546, |
|
"eval_loss": 1.703281283378601, |
|
"eval_runtime": 256.932, |
|
"eval_samples_per_second": 4.651, |
|
"eval_steps_per_second": 0.292, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.34811715481171546, |
|
"eval_loss": 1.7031110525131226, |
|
"eval_runtime": 266.484, |
|
"eval_samples_per_second": 4.484, |
|
"eval_steps_per_second": 0.281, |
|
"step": 720 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.4231811343419113e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|