|
{ |
|
"best_metric": 0.09375, |
|
"best_model_checkpoint": "videomae-base-finetuned-NOSTRO-Nuovo-subset/checkpoint-280", |
|
"epoch": 3.0625, |
|
"eval_steps": 500, |
|
"global_step": 448, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022321428571428572, |
|
"grad_norm": 10.661712646484375, |
|
"learning_rate": 0.004888392857142857, |
|
"loss": 3.7577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.044642857142857144, |
|
"grad_norm": 11.643506050109863, |
|
"learning_rate": 0.004776785714285714, |
|
"loss": 3.8846, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06696428571428571, |
|
"grad_norm": 6.997533321380615, |
|
"learning_rate": 0.004665178571428572, |
|
"loss": 3.5104, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 3.857959270477295, |
|
"learning_rate": 0.0045535714285714285, |
|
"loss": 3.0997, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11160714285714286, |
|
"grad_norm": 7.694955348968506, |
|
"learning_rate": 0.004441964285714285, |
|
"loss": 3.0891, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13392857142857142, |
|
"grad_norm": 6.335569858551025, |
|
"learning_rate": 0.004330357142857143, |
|
"loss": 3.1063, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 6.71449089050293, |
|
"learning_rate": 0.00421875, |
|
"loss": 2.9859, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 3.177128553390503, |
|
"learning_rate": 0.004107142857142857, |
|
"loss": 3.0683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20089285714285715, |
|
"grad_norm": 3.323209285736084, |
|
"learning_rate": 0.0039955357142857145, |
|
"loss": 2.7508, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22321428571428573, |
|
"grad_norm": 3.6122429370880127, |
|
"learning_rate": 0.0038839285714285716, |
|
"loss": 2.6113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24553571428571427, |
|
"grad_norm": 2.3688178062438965, |
|
"learning_rate": 0.0037723214285714287, |
|
"loss": 1.987, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 4.9541215896606445, |
|
"learning_rate": 0.0036607142857142854, |
|
"loss": 3.0538, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29017857142857145, |
|
"grad_norm": 2.793215751647949, |
|
"learning_rate": 0.003549107142857143, |
|
"loss": 2.9083, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 2.4145069122314453, |
|
"learning_rate": 0.0034375, |
|
"loss": 2.8766, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"eval_accuracy": 0.0625, |
|
"eval_loss": 2.7276196479797363, |
|
"eval_runtime": 43.9026, |
|
"eval_samples_per_second": 1.458, |
|
"eval_steps_per_second": 0.364, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0223214285714286, |
|
"grad_norm": 3.8258724212646484, |
|
"learning_rate": 0.003325892857142857, |
|
"loss": 2.7419, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0446428571428572, |
|
"grad_norm": 2.8611817359924316, |
|
"learning_rate": 0.0032142857142857147, |
|
"loss": 2.9858, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0669642857142858, |
|
"grad_norm": 1.5673381090164185, |
|
"learning_rate": 0.0031026785714285718, |
|
"loss": 2.7431, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0892857142857142, |
|
"grad_norm": 1.6212363243103027, |
|
"learning_rate": 0.0029910714285714284, |
|
"loss": 2.6825, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1116071428571428, |
|
"grad_norm": 2.7107715606689453, |
|
"learning_rate": 0.0028794642857142855, |
|
"loss": 2.4267, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1339285714285714, |
|
"grad_norm": 2.5624172687530518, |
|
"learning_rate": 0.002767857142857143, |
|
"loss": 2.4435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 1.173656940460205, |
|
"learning_rate": 0.00265625, |
|
"loss": 2.8848, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1785714285714286, |
|
"grad_norm": 1.2296289205551147, |
|
"learning_rate": 0.002544642857142857, |
|
"loss": 2.7724, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2008928571428572, |
|
"grad_norm": 1.0573370456695557, |
|
"learning_rate": 0.0024330357142857144, |
|
"loss": 2.7356, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2232142857142858, |
|
"grad_norm": 0.9930482506752014, |
|
"learning_rate": 0.0023214285714285715, |
|
"loss": 2.5956, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2455357142857142, |
|
"grad_norm": 0.8850785493850708, |
|
"learning_rate": 0.0022098214285714286, |
|
"loss": 2.6961, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2678571428571428, |
|
"grad_norm": 1.2825168371200562, |
|
"learning_rate": 0.0020982142857142857, |
|
"loss": 2.6601, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2901785714285714, |
|
"grad_norm": 1.7537394762039185, |
|
"learning_rate": 0.001986607142857143, |
|
"loss": 2.5393, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.3740110397338867, |
|
"learning_rate": 0.001875, |
|
"loss": 2.719, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"eval_accuracy": 0.09375, |
|
"eval_loss": 2.659668445587158, |
|
"eval_runtime": 44.7095, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.358, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.0223214285714284, |
|
"grad_norm": 1.630075216293335, |
|
"learning_rate": 0.0017633928571428573, |
|
"loss": 2.6041, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.044642857142857, |
|
"grad_norm": 1.299541711807251, |
|
"learning_rate": 0.0016517857142857144, |
|
"loss": 2.5322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0669642857142856, |
|
"grad_norm": 1.2279021739959717, |
|
"learning_rate": 0.0015401785714285715, |
|
"loss": 2.7214, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.0892857142857144, |
|
"grad_norm": 0.823603630065918, |
|
"learning_rate": 0.0014285714285714286, |
|
"loss": 2.5328, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.111607142857143, |
|
"grad_norm": 1.4392638206481934, |
|
"learning_rate": 0.0013169642857142857, |
|
"loss": 2.6212, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.1339285714285716, |
|
"grad_norm": 1.9096788167953491, |
|
"learning_rate": 0.001205357142857143, |
|
"loss": 2.6821, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.15625, |
|
"grad_norm": 1.3150078058242798, |
|
"learning_rate": 0.00109375, |
|
"loss": 2.4551, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1785714285714284, |
|
"grad_norm": 1.3114469051361084, |
|
"learning_rate": 0.0009821428571428572, |
|
"loss": 2.5904, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.200892857142857, |
|
"grad_norm": 1.3170655965805054, |
|
"learning_rate": 0.0008705357142857143, |
|
"loss": 2.5248, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.2232142857142856, |
|
"grad_norm": 1.2158620357513428, |
|
"learning_rate": 0.0007589285714285714, |
|
"loss": 2.6605, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2455357142857144, |
|
"grad_norm": 1.0650196075439453, |
|
"learning_rate": 0.0006473214285714286, |
|
"loss": 2.7011, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.267857142857143, |
|
"grad_norm": 0.740188479423523, |
|
"learning_rate": 0.0005357142857142857, |
|
"loss": 2.6101, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2901785714285716, |
|
"grad_norm": 0.9205682873725891, |
|
"learning_rate": 0.0004241071428571429, |
|
"loss": 2.5906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.3125, |
|
"grad_norm": 0.952158510684967, |
|
"learning_rate": 0.0003125, |
|
"loss": 2.6488, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.3125, |
|
"eval_accuracy": 0.09375, |
|
"eval_loss": 2.63785719871521, |
|
"eval_runtime": 45.7954, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.349, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.0223214285714284, |
|
"grad_norm": 0.751361608505249, |
|
"learning_rate": 0.00020089285714285717, |
|
"loss": 2.5654, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.044642857142857, |
|
"grad_norm": 1.7467005252838135, |
|
"learning_rate": 8.928571428571429e-05, |
|
"loss": 2.573, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.0625, |
|
"eval_accuracy": 0.09375, |
|
"eval_loss": 2.6362757682800293, |
|
"eval_runtime": 46.9191, |
|
"eval_samples_per_second": 1.364, |
|
"eval_steps_per_second": 0.341, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.0625, |
|
"step": 448, |
|
"total_flos": 2.2294481154124677e+18, |
|
"train_loss": 2.7684556926999773, |
|
"train_runtime": 2238.8021, |
|
"train_samples_per_second": 0.8, |
|
"train_steps_per_second": 0.2 |
|
}, |
|
{ |
|
"epoch": 3.0625, |
|
"eval_accuracy": 0.15853658536585366, |
|
"eval_loss": 2.5910537242889404, |
|
"eval_runtime": 69.9547, |
|
"eval_samples_per_second": 1.172, |
|
"eval_steps_per_second": 0.3, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.0625, |
|
"eval_accuracy": 0.15853658536585366, |
|
"eval_loss": 2.5910537242889404, |
|
"eval_runtime": 70.209, |
|
"eval_samples_per_second": 1.168, |
|
"eval_steps_per_second": 0.299, |
|
"step": 448 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 448, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2294481154124677e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|