N3N-AI1's picture
Upload folder using huggingface_hub
bfd6083 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.10614772224679346,
"eval_steps": 500,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0017691287041132243,
"grad_norm": 1.9742480441813004,
"learning_rate": 2e-08,
"loss": 3.2598,
"step": 1
},
{
"epoch": 0.0035382574082264487,
"grad_norm": 2.038016027037428,
"learning_rate": 4e-08,
"loss": 3.4531,
"step": 2
},
{
"epoch": 0.005307386112339673,
"grad_norm": 2.225136431627388,
"learning_rate": 6e-08,
"loss": 3.4746,
"step": 3
},
{
"epoch": 0.007076514816452897,
"grad_norm": 2.0178713550531033,
"learning_rate": 8e-08,
"loss": 3.293,
"step": 4
},
{
"epoch": 0.008845643520566122,
"grad_norm": 2.042908378033956,
"learning_rate": 1e-07,
"loss": 3.2383,
"step": 5
},
{
"epoch": 0.010614772224679346,
"grad_norm": 2.1329682870055464,
"learning_rate": 1.2e-07,
"loss": 3.2441,
"step": 6
},
{
"epoch": 0.01238390092879257,
"grad_norm": 2.1377356269998873,
"learning_rate": 1.4e-07,
"loss": 3.293,
"step": 7
},
{
"epoch": 0.014153029632905795,
"grad_norm": 2.0514676704041146,
"learning_rate": 1.6e-07,
"loss": 3.4004,
"step": 8
},
{
"epoch": 0.01592215833701902,
"grad_norm": 1.94775665931649,
"learning_rate": 1.8e-07,
"loss": 3.4355,
"step": 9
},
{
"epoch": 0.017691287041132243,
"grad_norm": 2.0977166835164653,
"learning_rate": 2e-07,
"loss": 3.3711,
"step": 10
},
{
"epoch": 0.019460415745245468,
"grad_norm": 2.192564032711453,
"learning_rate": 2.1999999999999998e-07,
"loss": 3.3398,
"step": 11
},
{
"epoch": 0.021229544449358692,
"grad_norm": 2.1498795965856914,
"learning_rate": 2.4e-07,
"loss": 3.1562,
"step": 12
},
{
"epoch": 0.022998673153471916,
"grad_norm": 2.109291509736264,
"learning_rate": 2.6e-07,
"loss": 3.3652,
"step": 13
},
{
"epoch": 0.02476780185758514,
"grad_norm": 2.044792800372603,
"learning_rate": 2.8e-07,
"loss": 3.2461,
"step": 14
},
{
"epoch": 0.026536930561698365,
"grad_norm": 2.167283994785129,
"learning_rate": 3e-07,
"loss": 3.3301,
"step": 15
},
{
"epoch": 0.02830605926581159,
"grad_norm": 1.995664320722997,
"learning_rate": 3.2e-07,
"loss": 3.3867,
"step": 16
},
{
"epoch": 0.03007518796992481,
"grad_norm": 2.00843521276344,
"learning_rate": 3.4000000000000003e-07,
"loss": 3.2363,
"step": 17
},
{
"epoch": 0.03184431667403804,
"grad_norm": 2.107294400286055,
"learning_rate": 3.6e-07,
"loss": 3.3809,
"step": 18
},
{
"epoch": 0.03361344537815126,
"grad_norm": 1.9965688324131208,
"learning_rate": 3.7999999999999996e-07,
"loss": 3.2637,
"step": 19
},
{
"epoch": 0.03538257408226449,
"grad_norm": 2.1690567393421936,
"learning_rate": 4e-07,
"loss": 3.2461,
"step": 20
},
{
"epoch": 0.03715170278637771,
"grad_norm": 1.9509465820725813,
"learning_rate": 4.1999999999999995e-07,
"loss": 3.3359,
"step": 21
},
{
"epoch": 0.038920831490490936,
"grad_norm": 2.180359699431997,
"learning_rate": 4.3999999999999997e-07,
"loss": 3.498,
"step": 22
},
{
"epoch": 0.040689960194604156,
"grad_norm": 1.9303585281557267,
"learning_rate": 4.6e-07,
"loss": 3.4453,
"step": 23
},
{
"epoch": 0.042459088898717384,
"grad_norm": 2.105287899781242,
"learning_rate": 4.8e-07,
"loss": 3.4531,
"step": 24
},
{
"epoch": 0.044228217602830605,
"grad_norm": 1.996519659869237,
"learning_rate": 5e-07,
"loss": 3.3633,
"step": 25
},
{
"epoch": 0.04599734630694383,
"grad_norm": 2.0672497293218903,
"learning_rate": 5.2e-07,
"loss": 3.4746,
"step": 26
},
{
"epoch": 0.047766475011057054,
"grad_norm": 2.0187116926490165,
"learning_rate": 5.4e-07,
"loss": 3.3105,
"step": 27
},
{
"epoch": 0.04953560371517028,
"grad_norm": 2.185446736666104,
"learning_rate": 5.6e-07,
"loss": 3.1953,
"step": 28
},
{
"epoch": 0.0513047324192835,
"grad_norm": 1.9785091042817515,
"learning_rate": 5.8e-07,
"loss": 3.2207,
"step": 29
},
{
"epoch": 0.05307386112339673,
"grad_norm": 1.983411961208081,
"learning_rate": 6e-07,
"loss": 3.1953,
"step": 30
},
{
"epoch": 0.05484298982750995,
"grad_norm": 1.8887794910668352,
"learning_rate": 6.2e-07,
"loss": 3.127,
"step": 31
},
{
"epoch": 0.05661211853162318,
"grad_norm": 2.024592500623624,
"learning_rate": 6.4e-07,
"loss": 3.3652,
"step": 32
},
{
"epoch": 0.0583812472357364,
"grad_norm": 2.033056092327317,
"learning_rate": 6.6e-07,
"loss": 3.4629,
"step": 33
},
{
"epoch": 0.06015037593984962,
"grad_norm": 2.1137985890313646,
"learning_rate": 6.800000000000001e-07,
"loss": 3.4277,
"step": 34
},
{
"epoch": 0.06191950464396285,
"grad_norm": 2.135970317417631,
"learning_rate": 7e-07,
"loss": 3.5664,
"step": 35
},
{
"epoch": 0.06368863334807608,
"grad_norm": 1.9525141602052385,
"learning_rate": 7.2e-07,
"loss": 3.3164,
"step": 36
},
{
"epoch": 0.0654577620521893,
"grad_norm": 1.9679140574444143,
"learning_rate": 7.4e-07,
"loss": 3.1348,
"step": 37
},
{
"epoch": 0.06722689075630252,
"grad_norm": 2.0697308820659295,
"learning_rate": 7.599999999999999e-07,
"loss": 3.3574,
"step": 38
},
{
"epoch": 0.06899601946041574,
"grad_norm": 2.0879787228782463,
"learning_rate": 7.799999999999999e-07,
"loss": 3.3477,
"step": 39
},
{
"epoch": 0.07076514816452897,
"grad_norm": 2.0051097367804234,
"learning_rate": 8e-07,
"loss": 3.3477,
"step": 40
},
{
"epoch": 0.0725342768686422,
"grad_norm": 2.039846964044792,
"learning_rate": 8.199999999999999e-07,
"loss": 3.2109,
"step": 41
},
{
"epoch": 0.07430340557275542,
"grad_norm": 2.13344976323939,
"learning_rate": 8.399999999999999e-07,
"loss": 3.3848,
"step": 42
},
{
"epoch": 0.07607253427686864,
"grad_norm": 2.2830511277961585,
"learning_rate": 8.599999999999999e-07,
"loss": 3.1602,
"step": 43
},
{
"epoch": 0.07784166298098187,
"grad_norm": 1.9427108734819927,
"learning_rate": 8.799999999999999e-07,
"loss": 3.1914,
"step": 44
},
{
"epoch": 0.07961079168509509,
"grad_norm": 1.9926391710215448,
"learning_rate": 9e-07,
"loss": 3.3711,
"step": 45
},
{
"epoch": 0.08137992038920831,
"grad_norm": 2.2237278323731107,
"learning_rate": 9.2e-07,
"loss": 3.4746,
"step": 46
},
{
"epoch": 0.08314904909332153,
"grad_norm": 2.123759872019136,
"learning_rate": 9.399999999999999e-07,
"loss": 3.3926,
"step": 47
},
{
"epoch": 0.08491817779743477,
"grad_norm": 2.138037893897646,
"learning_rate": 9.6e-07,
"loss": 3.377,
"step": 48
},
{
"epoch": 0.08668730650154799,
"grad_norm": 2.074234748374453,
"learning_rate": 9.8e-07,
"loss": 3.3457,
"step": 49
},
{
"epoch": 0.08845643520566121,
"grad_norm": 2.162562414477262,
"learning_rate": 1e-06,
"loss": 3.2148,
"step": 50
},
{
"epoch": 0.09022556390977443,
"grad_norm": 2.091661753228539,
"learning_rate": 1.02e-06,
"loss": 3.3613,
"step": 51
},
{
"epoch": 0.09199469261388767,
"grad_norm": 2.1815638012188963,
"learning_rate": 1.04e-06,
"loss": 3.2949,
"step": 52
},
{
"epoch": 0.09376382131800089,
"grad_norm": 2.127146363547092,
"learning_rate": 1.06e-06,
"loss": 3.4297,
"step": 53
},
{
"epoch": 0.09553295002211411,
"grad_norm": 2.201243987413546,
"learning_rate": 1.08e-06,
"loss": 3.1445,
"step": 54
},
{
"epoch": 0.09730207872622733,
"grad_norm": 2.1006629919292075,
"learning_rate": 1.1e-06,
"loss": 3.4238,
"step": 55
},
{
"epoch": 0.09907120743034056,
"grad_norm": 2.2056247586234115,
"learning_rate": 1.12e-06,
"loss": 3.3516,
"step": 56
},
{
"epoch": 0.10084033613445378,
"grad_norm": 2.300409874538962,
"learning_rate": 1.1399999999999999e-06,
"loss": 3.4766,
"step": 57
},
{
"epoch": 0.102609464838567,
"grad_norm": 2.10637739408632,
"learning_rate": 1.16e-06,
"loss": 3.3477,
"step": 58
},
{
"epoch": 0.10437859354268023,
"grad_norm": 2.047319466977983,
"learning_rate": 1.18e-06,
"loss": 3.3965,
"step": 59
},
{
"epoch": 0.10614772224679346,
"grad_norm": 2.0670306109309102,
"learning_rate": 1.2e-06,
"loss": 3.2266,
"step": 60
}
],
"logging_steps": 1,
"max_steps": 565,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 15477887729664.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}