|
{ |
|
"best_metric": 0.3048795163631439, |
|
"best_model_checkpoint": "./models/bert-italian-xxl-cased-ItaCoLA/checkpoint-700", |
|
"epoch": 10.0, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4243, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.854122621564482, |
|
"eval_loss": 0.38053709268569946, |
|
"eval_runtime": 3.6758, |
|
"eval_samples_per_second": 257.362, |
|
"eval_steps_per_second": 16.323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3806, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.857293868921776, |
|
"eval_loss": 0.3861564099788666, |
|
"eval_runtime": 3.5095, |
|
"eval_samples_per_second": 269.553, |
|
"eval_steps_per_second": 17.096, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3289, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.8678646934460887, |
|
"eval_loss": 0.35367050766944885, |
|
"eval_runtime": 3.7169, |
|
"eval_samples_per_second": 254.517, |
|
"eval_steps_per_second": 16.143, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2967, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.8826638477801269, |
|
"eval_loss": 0.3165006935596466, |
|
"eval_runtime": 3.5485, |
|
"eval_samples_per_second": 266.59, |
|
"eval_steps_per_second": 16.908, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.8784355179704016, |
|
"eval_loss": 0.33816611766815186, |
|
"eval_runtime": 3.7064, |
|
"eval_samples_per_second": 255.231, |
|
"eval_steps_per_second": 16.188, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2273, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.8816067653276956, |
|
"eval_loss": 0.329441100358963, |
|
"eval_runtime": 3.7539, |
|
"eval_samples_per_second": 252.003, |
|
"eval_steps_per_second": 15.983, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2215, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.8911205073995772, |
|
"eval_loss": 0.3048795163631439, |
|
"eval_runtime": 3.5485, |
|
"eval_samples_per_second": 266.591, |
|
"eval_steps_per_second": 16.909, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.8911205073995772, |
|
"eval_loss": 0.35308167338371277, |
|
"eval_runtime": 3.56, |
|
"eval_samples_per_second": 265.727, |
|
"eval_steps_per_second": 16.854, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1757, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.8921775898520085, |
|
"eval_loss": 0.39034464955329895, |
|
"eval_runtime": 3.7329, |
|
"eval_samples_per_second": 253.42, |
|
"eval_steps_per_second": 16.073, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1698, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.8953488372093024, |
|
"eval_loss": 0.3871164321899414, |
|
"eval_runtime": 3.5471, |
|
"eval_samples_per_second": 266.695, |
|
"eval_steps_per_second": 16.915, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1307, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_accuracy": 0.8953488372093024, |
|
"eval_loss": 0.42552220821380615, |
|
"eval_runtime": 3.5867, |
|
"eval_samples_per_second": 263.752, |
|
"eval_steps_per_second": 16.728, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1426, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_accuracy": 0.8985200845665962, |
|
"eval_loss": 0.37290310859680176, |
|
"eval_runtime": 3.5578, |
|
"eval_samples_per_second": 265.898, |
|
"eval_steps_per_second": 16.865, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1136, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.8964059196617337, |
|
"eval_loss": 0.49388667941093445, |
|
"eval_runtime": 3.5671, |
|
"eval_samples_per_second": 265.203, |
|
"eval_steps_per_second": 16.82, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1163, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_accuracy": 0.8964059196617337, |
|
"eval_loss": 0.4004368782043457, |
|
"eval_runtime": 3.7365, |
|
"eval_samples_per_second": 253.18, |
|
"eval_steps_per_second": 16.058, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0936, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_accuracy": 0.8964059196617337, |
|
"eval_loss": 0.5116058588027954, |
|
"eval_runtime": 3.5814, |
|
"eval_samples_per_second": 264.146, |
|
"eval_steps_per_second": 16.753, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0973, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_accuracy": 0.8921775898520085, |
|
"eval_loss": 0.4807981848716736, |
|
"eval_runtime": 3.7354, |
|
"eval_samples_per_second": 253.253, |
|
"eval_steps_per_second": 16.063, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0899, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.8868921775898521, |
|
"eval_loss": 0.48126643896102905, |
|
"eval_runtime": 3.5551, |
|
"eval_samples_per_second": 266.094, |
|
"eval_steps_per_second": 16.877, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0687, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_accuracy": 0.8847780126849895, |
|
"eval_loss": 0.6045676469802856, |
|
"eval_runtime": 3.7224, |
|
"eval_samples_per_second": 254.138, |
|
"eval_steps_per_second": 16.119, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0709, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_accuracy": 0.8964059196617337, |
|
"eval_loss": 0.5939581394195557, |
|
"eval_runtime": 3.7591, |
|
"eval_samples_per_second": 251.655, |
|
"eval_steps_per_second": 15.961, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0694, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_accuracy": 0.8911205073995772, |
|
"eval_loss": 0.5791226029396057, |
|
"eval_runtime": 3.5526, |
|
"eval_samples_per_second": 266.284, |
|
"eval_steps_per_second": 16.889, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0732, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_accuracy": 0.8921775898520085, |
|
"eval_loss": 0.5576857924461365, |
|
"eval_runtime": 3.5693, |
|
"eval_samples_per_second": 265.038, |
|
"eval_steps_per_second": 16.81, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0714, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.8995771670190275, |
|
"eval_loss": 0.5248907804489136, |
|
"eval_runtime": 3.7426, |
|
"eval_samples_per_second": 252.768, |
|
"eval_steps_per_second": 16.032, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0531, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_accuracy": 0.8932346723044398, |
|
"eval_loss": 0.6098359227180481, |
|
"eval_runtime": 3.5458, |
|
"eval_samples_per_second": 266.794, |
|
"eval_steps_per_second": 16.921, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0713, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_accuracy": 0.8942917547568711, |
|
"eval_loss": 0.5609742403030396, |
|
"eval_runtime": 3.5561, |
|
"eval_samples_per_second": 266.022, |
|
"eval_steps_per_second": 16.872, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2440, |
|
"total_flos": 5131323357158400.0, |
|
"train_loss": 0.16311937355604328, |
|
"train_runtime": 870.9833, |
|
"train_samples_per_second": 89.565, |
|
"train_steps_per_second": 2.801 |
|
} |
|
], |
|
"max_steps": 2440, |
|
"num_train_epochs": 10, |
|
"total_flos": 5131323357158400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|