|
{ |
|
"best_metric": 1.3801825046539307, |
|
"best_model_checkpoint": "./vit-emotion-classification/checkpoint-300", |
|
"epoch": 10.0, |
|
"eval_steps": 100, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.134533643722534, |
|
"learning_rate": 0.000195, |
|
"loss": 2.0279, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.939251184463501, |
|
"learning_rate": 0.00019, |
|
"loss": 1.7986, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8353347778320312, |
|
"learning_rate": 0.00018500000000000002, |
|
"loss": 1.6841, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.401373863220215, |
|
"learning_rate": 0.00018, |
|
"loss": 1.5968, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.080655813217163, |
|
"learning_rate": 0.000175, |
|
"loss": 1.3349, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.146280527114868, |
|
"learning_rate": 0.00017, |
|
"loss": 1.2608, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.722163438796997, |
|
"learning_rate": 0.000165, |
|
"loss": 1.2728, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9687062501907349, |
|
"learning_rate": 0.00016, |
|
"loss": 1.1254, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.55076265335083, |
|
"learning_rate": 0.000155, |
|
"loss": 0.8877, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.256789445877075, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.8454, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.48125, |
|
"eval_loss": 1.4373115301132202, |
|
"eval_runtime": 0.8376, |
|
"eval_samples_per_second": 191.019, |
|
"eval_steps_per_second": 23.877, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 2.4422082901000977, |
|
"learning_rate": 0.000145, |
|
"loss": 0.8337, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.179633140563965, |
|
"learning_rate": 0.00014, |
|
"loss": 0.882, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 1.4560832977294922, |
|
"learning_rate": 0.00013500000000000003, |
|
"loss": 0.5198, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 2.0190017223358154, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 0.4344, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 3.2591023445129395, |
|
"learning_rate": 0.000125, |
|
"loss": 0.4698, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.652550220489502, |
|
"learning_rate": 0.00012, |
|
"loss": 0.54, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.5937463641166687, |
|
"learning_rate": 0.00011499999999999999, |
|
"loss": 0.2737, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 1.0296827554702759, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 0.2402, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 3.007828712463379, |
|
"learning_rate": 0.000105, |
|
"loss": 0.1952, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.936133623123169, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2022, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.406724214553833, |
|
"eval_runtime": 0.8264, |
|
"eval_samples_per_second": 193.601, |
|
"eval_steps_per_second": 24.2, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"grad_norm": 0.3376343250274658, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.1086, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 2.637477397918701, |
|
"learning_rate": 9e-05, |
|
"loss": 0.109, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 3.05291485786438, |
|
"learning_rate": 8.5e-05, |
|
"loss": 0.0957, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.055624485015869, |
|
"learning_rate": 8e-05, |
|
"loss": 0.136, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.30109065771102905, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0741, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 0.19268257915973663, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0664, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.15611745417118073, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.0559, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.21113821864128113, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0599, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 0.160753071308136, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.049, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 0.13460688292980194, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0474, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_accuracy": 0.6125, |
|
"eval_loss": 1.3801825046539307, |
|
"eval_runtime": 0.8118, |
|
"eval_samples_per_second": 197.084, |
|
"eval_steps_per_second": 24.635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 0.13902044296264648, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0452, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.12478330731391907, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0424, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"grad_norm": 0.11532563716173172, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0409, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 0.11974634230136871, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0395, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 0.1203409880399704, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0393, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.13047201931476593, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0386, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.10817253589630127, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0376, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.12842506170272827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.037, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"grad_norm": 0.12066592276096344, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0371, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.13028773665428162, |
|
"learning_rate": 0.0, |
|
"loss": 0.0368, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.59375, |
|
"eval_loss": 1.4388375282287598, |
|
"eval_runtime": 0.8121, |
|
"eval_samples_per_second": 197.013, |
|
"eval_steps_per_second": 24.627, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 400, |
|
"total_flos": 4.959754037231616e+17, |
|
"train_loss": 0.4905405020713806, |
|
"train_runtime": 117.1653, |
|
"train_samples_per_second": 54.624, |
|
"train_steps_per_second": 3.414 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.959754037231616e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|