llama-finance-relations / trainer_state.json
Askinkaty's picture
Upload 17 files
fdbfac7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.988458927359131,
"eval_steps": 500,
"global_step": 552,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05431093007467753,
"grad_norm": 1.4870964288711548,
"learning_rate": 0.00019997351589651408,
"loss": 3.4965,
"step": 10
},
{
"epoch": 0.10862186014935506,
"grad_norm": 1.784044861793518,
"learning_rate": 0.00019967573081342103,
"loss": 2.065,
"step": 20
},
{
"epoch": 0.1629327902240326,
"grad_norm": 0.7305468916893005,
"learning_rate": 0.00019904804439875633,
"loss": 1.2421,
"step": 30
},
{
"epoch": 0.2172437202987101,
"grad_norm": 0.6995559930801392,
"learning_rate": 0.00019809253413499565,
"loss": 1.093,
"step": 40
},
{
"epoch": 0.27155465037338766,
"grad_norm": 0.6627448201179504,
"learning_rate": 0.00019681236251822273,
"loss": 1.0856,
"step": 50
},
{
"epoch": 0.3258655804480652,
"grad_norm": 0.7160666584968567,
"learning_rate": 0.00019521176659107142,
"loss": 1.013,
"step": 60
},
{
"epoch": 0.3801765105227427,
"grad_norm": 0.6306814551353455,
"learning_rate": 0.0001932960439191915,
"loss": 1.0374,
"step": 70
},
{
"epoch": 0.4344874405974202,
"grad_norm": 0.7758208513259888,
"learning_rate": 0.00019107153505765306,
"loss": 0.9474,
"step": 80
},
{
"epoch": 0.48879837067209775,
"grad_norm": 1.2394300699234009,
"learning_rate": 0.000188545602565321,
"loss": 0.9932,
"step": 90
},
{
"epoch": 0.5431093007467753,
"grad_norm": 0.829031229019165,
"learning_rate": 0.0001857266066366567,
"loss": 0.9204,
"step": 100
},
{
"epoch": 0.5974202308214528,
"grad_norm": 0.7629134654998779,
"learning_rate": 0.0001826238774315995,
"loss": 0.9457,
"step": 110
},
{
"epoch": 0.6517311608961304,
"grad_norm": 0.8157823085784912,
"learning_rate": 0.00017924768419510904,
"loss": 0.8539,
"step": 120
},
{
"epoch": 0.7060420909708078,
"grad_norm": 0.7475631237030029,
"learning_rate": 0.0001756092012685749,
"loss": 0.82,
"step": 130
},
{
"epoch": 0.7603530210454854,
"grad_norm": 0.6592528223991394,
"learning_rate": 0.000171720471105587,
"loss": 0.8846,
"step": 140
},
{
"epoch": 0.814663951120163,
"grad_norm": 0.6989027857780457,
"learning_rate": 0.00016759436441447545,
"loss": 0.8367,
"step": 150
},
{
"epoch": 0.8689748811948405,
"grad_norm": 0.7253873348236084,
"learning_rate": 0.00016324453755953773,
"loss": 0.8068,
"step": 160
},
{
"epoch": 0.923285811269518,
"grad_norm": 0.7640873193740845,
"learning_rate": 0.00015868538736194427,
"loss": 0.8169,
"step": 170
},
{
"epoch": 0.9775967413441955,
"grad_norm": 0.7669989466667175,
"learning_rate": 0.00015393200344991995,
"loss": 0.8355,
"step": 180
},
{
"epoch": 1.0271554650373387,
"grad_norm": 0.7532988786697388,
"learning_rate": 0.0001490001183159105,
"loss": 0.7339,
"step": 190
},
{
"epoch": 1.0814663951120163,
"grad_norm": 0.7974510192871094,
"learning_rate": 0.0001439060552460318,
"loss": 0.8186,
"step": 200
},
{
"epoch": 1.1357773251866938,
"grad_norm": 0.9017219543457031,
"learning_rate": 0.0001386666742941419,
"loss": 0.775,
"step": 210
},
{
"epoch": 1.1900882552613714,
"grad_norm": 0.8205109238624573,
"learning_rate": 0.00013329931647934883,
"loss": 0.7421,
"step": 220
},
{
"epoch": 1.2443991853360488,
"grad_norm": 0.866692066192627,
"learning_rate": 0.0001278217463916453,
"loss": 0.7113,
"step": 230
},
{
"epoch": 1.2987101154107263,
"grad_norm": 0.8832337856292725,
"learning_rate": 0.00012225209339563145,
"loss": 0.7545,
"step": 240
},
{
"epoch": 1.353021045485404,
"grad_norm": 1.0796443223953247,
"learning_rate": 0.00011660879162692675,
"loss": 0.7085,
"step": 250
},
{
"epoch": 1.4073319755600815,
"grad_norm": 0.9231683015823364,
"learning_rate": 0.00011091051897986678,
"loss": 0.7168,
"step": 260
},
{
"epoch": 1.461642905634759,
"grad_norm": 0.8881363272666931,
"learning_rate": 0.00010517613528842097,
"loss": 0.7606,
"step": 270
},
{
"epoch": 1.5159538357094364,
"grad_norm": 0.8930597901344299,
"learning_rate": 9.942461990493625e-05,
"loss": 0.6926,
"step": 280
},
{
"epoch": 1.570264765784114,
"grad_norm": 1.0270030498504639,
"learning_rate": 9.367500888330545e-05,
"loss": 0.7571,
"step": 290
},
{
"epoch": 1.6245756958587916,
"grad_norm": 0.8959159255027771,
"learning_rate": 8.79463319744677e-05,
"loss": 0.7786,
"step": 300
},
{
"epoch": 1.6788866259334692,
"grad_norm": 0.8595919013023376,
"learning_rate": 8.225754964277018e-05,
"loss": 0.6935,
"step": 310
},
{
"epoch": 1.7331975560081467,
"grad_norm": 0.953175961971283,
"learning_rate": 7.662749031165092e-05,
"loss": 0.6901,
"step": 320
},
{
"epoch": 1.787508486082824,
"grad_norm": 0.985431969165802,
"learning_rate": 7.107478804634325e-05,
"loss": 0.7101,
"step": 330
},
{
"epoch": 1.8418194161575017,
"grad_norm": 1.0016827583312988,
"learning_rate": 6.561782087985681e-05,
"loss": 0.707,
"step": 340
},
{
"epoch": 1.8961303462321792,
"grad_norm": 0.9732582569122314,
"learning_rate": 6.02746499863599e-05,
"loss": 0.7426,
"step": 350
},
{
"epoch": 1.9504412763068566,
"grad_norm": 0.9253762364387512,
"learning_rate": 5.506295990328385e-05,
"loss": 0.7273,
"step": 360
},
{
"epoch": 2.0,
"grad_norm": 2.792293071746826,
"learning_rate": 5.000000000000002e-05,
"loss": 0.7256,
"step": 370
},
{
"epoch": 2.0543109300746774,
"grad_norm": 0.9254827499389648,
"learning_rate": 4.510252738679136e-05,
"loss": 0.6432,
"step": 380
},
{
"epoch": 2.108621860149355,
"grad_norm": 1.0876941680908203,
"learning_rate": 4.038675145307747e-05,
"loss": 0.6256,
"step": 390
},
{
"epoch": 2.1629327902240325,
"grad_norm": 0.916249692440033,
"learning_rate": 3.5868280218455796e-05,
"loss": 0.6442,
"step": 400
},
{
"epoch": 2.2172437202987103,
"grad_norm": 0.9240853190422058,
"learning_rate": 3.1562068674124344e-05,
"loss": 0.5883,
"step": 410
},
{
"epoch": 2.2715546503733877,
"grad_norm": 1.2008038759231567,
"learning_rate": 2.7482369285662378e-05,
"loss": 0.6987,
"step": 420
},
{
"epoch": 2.325865580448065,
"grad_norm": 1.2723044157028198,
"learning_rate": 2.364268482099218e-05,
"loss": 0.708,
"step": 430
},
{
"epoch": 2.380176510522743,
"grad_norm": 0.9695908427238464,
"learning_rate": 2.0055723659649904e-05,
"loss": 0.6782,
"step": 440
},
{
"epoch": 2.43448744059742,
"grad_norm": 1.044391393661499,
"learning_rate": 1.6733357731279377e-05,
"loss": 0.5803,
"step": 450
},
{
"epoch": 2.4887983706720975,
"grad_norm": 0.9964624643325806,
"learning_rate": 1.368658322256311e-05,
"loss": 0.6112,
"step": 460
},
{
"epoch": 2.5431093007467753,
"grad_norm": 1.004639744758606,
"learning_rate": 1.0925484182639467e-05,
"loss": 0.6322,
"step": 470
},
{
"epoch": 2.5974202308214527,
"grad_norm": 1.1456069946289062,
"learning_rate": 8.45919914746337e-06,
"loss": 0.5633,
"step": 480
},
{
"epoch": 2.6517311608961305,
"grad_norm": 1.1862763166427612,
"learning_rate": 6.2958908935752955e-06,
"loss": 0.5859,
"step": 490
},
{
"epoch": 2.706042090970808,
"grad_norm": 1.1233826875686646,
"learning_rate": 4.442719421385922e-06,
"loss": 0.6147,
"step": 500
},
{
"epoch": 2.7603530210454856,
"grad_norm": 1.0159374475479126,
"learning_rate": 2.905818257394799e-06,
"loss": 0.5829,
"step": 510
},
{
"epoch": 2.814663951120163,
"grad_norm": 1.053791880607605,
"learning_rate": 1.6902741537767609e-06,
"loss": 0.5938,
"step": 520
},
{
"epoch": 2.8689748811948403,
"grad_norm": 1.0928566455841064,
"learning_rate": 8.00110252525299e-07,
"loss": 0.6136,
"step": 530
},
{
"epoch": 2.923285811269518,
"grad_norm": 1.1599104404449463,
"learning_rate": 2.382727698752474e-07,
"loss": 0.6389,
"step": 540
},
{
"epoch": 2.9775967413441955,
"grad_norm": 1.2020913362503052,
"learning_rate": 6.621245075910665e-09,
"loss": 0.6719,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 552,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4322859040948224.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}