qwen2.5_ORPO_it_7b_epochs3 / trainer_state.json
Cherran's picture
Upload folder using huggingface_hub
e87d887 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9635036496350367,
"eval_steps": 500,
"global_step": 204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.291970802919708,
"grad_norm": 1.0873714685440063,
"learning_rate": 0.0004761904761904762,
"log_odds_chosen": -0.5441410541534424,
"log_odds_ratio": -1.0390523672103882,
"logits/chosen": -0.621608316898346,
"logits/rejected": -0.6012401580810547,
"logps/chosen": -1.8206005096435547,
"logps/rejected": -1.3865854740142822,
"loss": 1.4562,
"nll_loss": 1.352305293083191,
"rewards/accuracies": 0.15000000596046448,
"rewards/chosen": -0.18206006288528442,
"rewards/margins": -0.04340151697397232,
"rewards/rejected": -0.1386585533618927,
"step": 20
},
{
"epoch": 0.583941605839416,
"grad_norm": 1.0895646810531616,
"learning_rate": 0.0004868186180746792,
"log_odds_chosen": -0.3829799294471741,
"log_odds_ratio": -0.9370166063308716,
"logits/chosen": -0.4700603485107422,
"logits/rejected": -0.44150036573410034,
"logps/chosen": -1.2983076572418213,
"logps/rejected": -1.0247515439987183,
"loss": 1.0145,
"nll_loss": 0.92084801197052,
"rewards/accuracies": 0.234375,
"rewards/chosen": -0.1298307627439499,
"rewards/margins": -0.027355605736374855,
"rewards/rejected": -0.10247516632080078,
"step": 40
},
{
"epoch": 0.8759124087591241,
"grad_norm": 1.0798814296722412,
"learning_rate": 0.00044602995164417765,
"log_odds_chosen": -0.20668645203113556,
"log_odds_ratio": -0.8541840314865112,
"logits/chosen": -0.46093836426734924,
"logits/rejected": -0.45649147033691406,
"logps/chosen": -1.2755881547927856,
"logps/rejected": -1.1037111282348633,
"loss": 0.9847,
"nll_loss": 0.8992462158203125,
"rewards/accuracies": 0.34062498807907104,
"rewards/chosen": -0.12755881249904633,
"rewards/margins": -0.01718769781291485,
"rewards/rejected": -0.11037111282348633,
"step": 60
},
{
"epoch": 1.1605839416058394,
"grad_norm": 1.3134093284606934,
"learning_rate": 0.0003823584728045463,
"log_odds_chosen": 0.21238426864147186,
"log_odds_ratio": -0.6697712540626526,
"logits/chosen": -0.6017207503318787,
"logits/rejected": -0.6049566268920898,
"logps/chosen": -1.103987455368042,
"logps/rejected": -1.2036486864089966,
"loss": 0.8466,
"nll_loss": 0.8013469576835632,
"rewards/accuracies": 0.5256410241127014,
"rewards/chosen": -0.11039875447750092,
"rewards/margins": 0.009966122917830944,
"rewards/rejected": -0.12036487460136414,
"step": 80
},
{
"epoch": 1.4525547445255476,
"grad_norm": 1.4332325458526611,
"learning_rate": 0.00030323662998460395,
"log_odds_chosen": 0.5833564400672913,
"log_odds_ratio": -0.5137361884117126,
"logits/chosen": -0.7008029818534851,
"logits/rejected": -0.6883307695388794,
"logps/chosen": -0.8778481483459473,
"logps/rejected": -1.199577808380127,
"loss": 0.7714,
"nll_loss": 0.7199974656105042,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.08778481185436249,
"rewards/margins": 0.03217295557260513,
"rewards/rejected": -0.11995778232812881,
"step": 100
},
{
"epoch": 1.7445255474452555,
"grad_norm": 1.5278706550598145,
"learning_rate": 0.00021790041121336223,
"log_odds_chosen": 0.4335743486881256,
"log_odds_ratio": -0.5648446083068848,
"logits/chosen": -0.6764112114906311,
"logits/rejected": -0.6685619950294495,
"logps/chosen": -0.9028280377388,
"logps/rejected": -1.147694706916809,
"loss": 0.7602,
"nll_loss": 0.7037514448165894,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.09028279781341553,
"rewards/margins": 0.02448667585849762,
"rewards/rejected": -0.11476948112249374,
"step": 120
},
{
"epoch": 2.0291970802919708,
"grad_norm": 1.2298489809036255,
"learning_rate": 0.00013631121611097364,
"log_odds_chosen": 0.8053007125854492,
"log_odds_ratio": -0.4435744285583496,
"logits/chosen": -0.6849303841590881,
"logits/rejected": -0.6694097518920898,
"logps/chosen": -0.7989615797996521,
"logps/rejected": -1.2587368488311768,
"loss": 0.7171,
"nll_loss": 0.6910890936851501,
"rewards/accuracies": 0.817307710647583,
"rewards/chosen": -0.07989615947008133,
"rewards/margins": 0.04597752168774605,
"rewards/rejected": -0.12587368488311768,
"step": 140
},
{
"epoch": 2.321167883211679,
"grad_norm": 1.4861160516738892,
"learning_rate": 6.799304971075382e-05,
"log_odds_chosen": 2.2542662620544434,
"log_odds_ratio": -0.14231202006340027,
"logits/chosen": -0.8755828738212585,
"logits/rejected": -0.8502569198608398,
"logps/chosen": -0.46075183153152466,
"logps/rejected": -1.7119022607803345,
"loss": 0.5128,
"nll_loss": 0.4985771179199219,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -0.046075187623500824,
"rewards/margins": 0.1251150369644165,
"rewards/rejected": -0.17119023203849792,
"step": 160
},
{
"epoch": 2.613138686131387,
"grad_norm": 1.5773358345031738,
"learning_rate": 2.092077387824884e-05,
"log_odds_chosen": 2.26225209236145,
"log_odds_ratio": -0.13692842423915863,
"logits/chosen": -0.9204779863357544,
"logits/rejected": -0.8904932141304016,
"logps/chosen": -0.4740973114967346,
"logps/rejected": -1.7312190532684326,
"loss": 0.5437,
"nll_loss": 0.5300474762916565,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -0.04740973562002182,
"rewards/margins": 0.12571218609809875,
"rewards/rejected": -0.17312191426753998,
"step": 180
},
{
"epoch": 2.905109489051095,
"grad_norm": 1.3676555156707764,
"learning_rate": 5.891920784984184e-07,
"log_odds_chosen": 2.3821473121643066,
"log_odds_ratio": -0.13435281813144684,
"logits/chosen": -0.9195858240127563,
"logits/rejected": -0.8968290090560913,
"logps/chosen": -0.40853673219680786,
"logps/rejected": -1.648188829421997,
"loss": 0.5142,
"nll_loss": 0.5008108615875244,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.040853675454854965,
"rewards/margins": 0.12396518886089325,
"rewards/rejected": -0.16481885313987732,
"step": 200
}
],
"logging_steps": 20,
"max_steps": 204,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}