Llama3-8B-Instruct-mntp-patent / trainer_state.json
saroyehun's picture
Upload folder using huggingface_hub
21f2c3f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08815232722143865,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01763046544428773,
"grad_norm": 1.5435048341751099,
"learning_rate": 4.9706158909261876e-05,
"loss": 0.8407,
"step": 500
},
{
"epoch": 0.01763046544428773,
"eval_accuracy": 0.8252411503248731,
"eval_loss": 0.7565935850143433,
"eval_runtime": 2895.6955,
"eval_samples_per_second": 32.989,
"eval_steps_per_second": 0.516,
"step": 500
},
{
"epoch": 0.03526093088857546,
"grad_norm": 1.5116485357284546,
"learning_rate": 4.9412317818523744e-05,
"loss": 0.5964,
"step": 1000
},
{
"epoch": 0.03526093088857546,
"eval_accuracy": 0.8373293281429335,
"eval_loss": 0.6963507533073425,
"eval_runtime": 2899.1751,
"eval_samples_per_second": 32.949,
"eval_steps_per_second": 0.515,
"step": 1000
},
{
"epoch": 0.05289139633286319,
"grad_norm": 1.4373358488082886,
"learning_rate": 4.911847672778562e-05,
"loss": 0.5661,
"step": 1500
},
{
"epoch": 0.05289139633286319,
"eval_accuracy": 0.8443953465863471,
"eval_loss": 0.6656736731529236,
"eval_runtime": 2944.9636,
"eval_samples_per_second": 32.437,
"eval_steps_per_second": 0.507,
"step": 1500
},
{
"epoch": 0.07052186177715092,
"grad_norm": 1.216012716293335,
"learning_rate": 4.882463563704749e-05,
"loss": 0.5402,
"step": 2000
},
{
"epoch": 0.07052186177715092,
"eval_accuracy": 0.8482718545347777,
"eval_loss": 0.6440214514732361,
"eval_runtime": 2944.5243,
"eval_samples_per_second": 32.442,
"eval_steps_per_second": 0.507,
"step": 2000
},
{
"epoch": 0.08815232722143865,
"grad_norm": 1.0847452878952026,
"learning_rate": 4.853079454630936e-05,
"loss": 0.5237,
"step": 2500
},
{
"epoch": 0.08815232722143865,
"eval_accuracy": 0.8508165457808422,
"eval_loss": 0.6308088898658752,
"eval_runtime": 2933.5042,
"eval_samples_per_second": 32.564,
"eval_steps_per_second": 0.509,
"step": 2500
}
],
"logging_steps": 500,
"max_steps": 85080,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 3.70943641780224e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}