Training in progress, step 15600, checkpoint

Browse files

Files changed (5) hide show

.gitattributes +1 -0
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +273 -3

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dce9c5324820ff451aba14ba419aeec1add1a412f3edb32c6c7c0cf2adea8138
 size 3541119728

 version https://git-lfs.github.com/spec/v1
+oid sha256:70537360c9daddf04205b6fbd293c0d4965ec40c67ef261daf546af624afd98f
 size 3541119728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42d8ed1310dd3a3e758a6e193c344d11e872913dc2ecb72c8250191cb9dd1811
 size 778374186

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ca05e267a448d87fef33633929234240f69ebde46a8d89d8a7bbe11cbc11f6c
 size 778374186

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88acc8857c9b71c170e2e7e131e921953f71be549574de8a4567e54277800a43
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8955cd9d24ecd092d5a24dfa8ee9d34839e14159c86f280833a6a8e4cb640de6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7301447165917035,
   "eval_steps": 500,
-  "global_step": 15300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -13778,6 +13778,276 @@
       "mean_token_accuracy": 0.8891868680715561,
       "num_tokens": 25427005.0,
       "step": 15300
     }
   ],
   "logging_steps": 10,
@@ -13797,7 +14067,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.726368297825444e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7444612796621292,
   "eval_steps": 500,
+  "global_step": 15600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.8891868680715561,
       "num_tokens": 25427005.0,
       "step": 15300
+    },
+    {
+      "epoch": 0.7306219353607177,
+      "grad_norm": 0.3419613242149353,
+      "learning_rate": 1.2694345025053687e-05,
+      "loss": 0.6515,
+      "mean_token_accuracy": 0.8683709055185318,
+      "num_tokens": 25442614.0,
+      "step": 15310
+    },
+    {
+      "epoch": 0.7310991541297319,
+      "grad_norm": 0.35376232862472534,
+      "learning_rate": 1.2689572894297304e-05,
+      "loss": 0.6847,
+      "mean_token_accuracy": 0.8579560115933418,
+      "num_tokens": 25460244.0,
+      "step": 15320
+    },
+    {
+      "epoch": 0.7315763728987461,
+      "grad_norm": 0.4027968943119049,
+      "learning_rate": 1.2684800763540922e-05,
+      "loss": 0.5904,
+      "mean_token_accuracy": 0.8813899368047714,
+      "num_tokens": 25477478.0,
+      "step": 15330
+    },
+    {
+      "epoch": 0.7320535916677603,
+      "grad_norm": 0.33690837025642395,
+      "learning_rate": 1.268002863278454e-05,
+      "loss": 0.6076,
+      "mean_token_accuracy": 0.8772727206349373,
+      "num_tokens": 25494023.0,
+      "step": 15340
+    },
+    {
+      "epoch": 0.7325308104367745,
+      "grad_norm": 0.3748989999294281,
+      "learning_rate": 1.2675256502028157e-05,
+      "loss": 0.6861,
+      "mean_token_accuracy": 0.8666884452104568,
+      "num_tokens": 25510507.0,
+      "step": 15350
+    },
+    {
+      "epoch": 0.7330080292057887,
+      "grad_norm": 0.3961426317691803,
+      "learning_rate": 1.2670484371271774e-05,
+      "loss": 0.5477,
+      "mean_token_accuracy": 0.8997573867440224,
+      "num_tokens": 25525876.0,
+      "step": 15360
+    },
+    {
+      "epoch": 0.7334852479748029,
+      "grad_norm": 0.32060977816581726,
+      "learning_rate": 1.266571224051539e-05,
+      "loss": 0.6086,
+      "mean_token_accuracy": 0.8730768218636513,
+      "num_tokens": 25542626.0,
+      "step": 15370
+    },
+    {
+      "epoch": 0.733962466743817,
+      "grad_norm": 0.4424884617328644,
+      "learning_rate": 1.2660940109759007e-05,
+      "loss": 0.6637,
+      "mean_token_accuracy": 0.8715329870581627,
+      "num_tokens": 25559552.0,
+      "step": 15380
+    },
+    {
+      "epoch": 0.7344396855128312,
+      "grad_norm": 0.2700168192386627,
+      "learning_rate": 1.2656167979002624e-05,
+      "loss": 0.6507,
+      "mean_token_accuracy": 0.8775774970650673,
+      "num_tokens": 25575310.0,
+      "step": 15390
+    },
+    {
+      "epoch": 0.7349169042818454,
+      "grad_norm": 0.34019699692726135,
+      "learning_rate": 1.2651395848246244e-05,
+      "loss": 0.6687,
+      "mean_token_accuracy": 0.8798381179571152,
+      "num_tokens": 25590414.0,
+      "step": 15400
+    },
+    {
+      "epoch": 0.7353941230508596,
+      "grad_norm": 0.41453129053115845,
+      "learning_rate": 1.2646623717489861e-05,
+      "loss": 0.692,
+      "mean_token_accuracy": 0.8671859934926033,
+      "num_tokens": 25608759.0,
+      "step": 15410
+    },
+    {
+      "epoch": 0.7358713418198738,
+      "grad_norm": 0.37873607873916626,
+      "learning_rate": 1.2641851586733478e-05,
+      "loss": 0.7208,
+      "mean_token_accuracy": 0.8635074034333229,
+      "num_tokens": 25626985.0,
+      "step": 15420
+    },
+    {
+      "epoch": 0.736348560588888,
+      "grad_norm": 0.3016092777252197,
+      "learning_rate": 1.2637079455977094e-05,
+      "loss": 0.6058,
+      "mean_token_accuracy": 0.8779332295060158,
+      "num_tokens": 25642693.0,
+      "step": 15430
+    },
+    {
+      "epoch": 0.7368257793579022,
+      "grad_norm": 0.3086267411708832,
+      "learning_rate": 1.2632307325220713e-05,
+      "loss": 0.6249,
+      "mean_token_accuracy": 0.8778651550412178,
+      "num_tokens": 25659131.0,
+      "step": 15440
+    },
+    {
+      "epoch": 0.7373029981269164,
+      "grad_norm": 0.3954660892486572,
+      "learning_rate": 1.262753519446433e-05,
+      "loss": 0.6073,
+      "mean_token_accuracy": 0.8788123086094857,
+      "num_tokens": 25675282.0,
+      "step": 15450
+    },
+    {
+      "epoch": 0.7377802168959305,
+      "grad_norm": 0.3375210165977478,
+      "learning_rate": 1.2622763063707946e-05,
+      "loss": 0.5759,
+      "mean_token_accuracy": 0.8832358941435814,
+      "num_tokens": 25690284.0,
+      "step": 15460
+    },
+    {
+      "epoch": 0.7382574356649447,
+      "grad_norm": 0.429108202457428,
+      "learning_rate": 1.2617990932951564e-05,
+      "loss": 0.5676,
+      "mean_token_accuracy": 0.8806645110249519,
+      "num_tokens": 25705824.0,
+      "step": 15470
+    },
+    {
+      "epoch": 0.7387346544339589,
+      "grad_norm": 0.3869950771331787,
+      "learning_rate": 1.2613218802195181e-05,
+      "loss": 0.56,
+      "mean_token_accuracy": 0.8864919826388359,
+      "num_tokens": 25721448.0,
+      "step": 15480
+    },
+    {
+      "epoch": 0.7392118732029731,
+      "grad_norm": 0.2914048731327057,
+      "learning_rate": 1.26084466714388e-05,
+      "loss": 0.6239,
+      "mean_token_accuracy": 0.880633682012558,
+      "num_tokens": 25736900.0,
+      "step": 15490
+    },
+    {
+      "epoch": 0.7396890919719873,
+      "grad_norm": 0.3728204667568207,
+      "learning_rate": 1.2603674540682416e-05,
+      "loss": 0.6358,
+      "mean_token_accuracy": 0.8782069548964501,
+      "num_tokens": 25753432.0,
+      "step": 15500
+    },
+    {
+      "epoch": 0.7401663107410015,
+      "grad_norm": 0.3584674596786499,
+      "learning_rate": 1.2598902409926033e-05,
+      "loss": 0.5266,
+      "mean_token_accuracy": 0.887596707046032,
+      "num_tokens": 25769120.0,
+      "step": 15510
+    },
+    {
+      "epoch": 0.7406435295100157,
+      "grad_norm": 0.4318288564682007,
+      "learning_rate": 1.259413027916965e-05,
+      "loss": 0.5954,
+      "mean_token_accuracy": 0.8761422768235206,
+      "num_tokens": 25785477.0,
+      "step": 15520
+    },
+    {
+      "epoch": 0.7411207482790299,
+      "grad_norm": 0.3693118989467621,
+      "learning_rate": 1.2589358148413266e-05,
+      "loss": 0.6766,
+      "mean_token_accuracy": 0.8750508233904839,
+      "num_tokens": 25803338.0,
+      "step": 15530
+    },
+    {
+      "epoch": 0.741597967048044,
+      "grad_norm": 0.30119234323501587,
+      "learning_rate": 1.2584586017656886e-05,
+      "loss": 0.6606,
+      "mean_token_accuracy": 0.8698265522718429,
+      "num_tokens": 25819899.0,
+      "step": 15540
+    },
+    {
+      "epoch": 0.7420751858170582,
+      "grad_norm": 0.702343761920929,
+      "learning_rate": 1.2579813886900503e-05,
+      "loss": 0.7339,
+      "mean_token_accuracy": 0.869141760468483,
+      "num_tokens": 25837531.0,
+      "step": 15550
+    },
+    {
+      "epoch": 0.7425524045860724,
+      "grad_norm": 0.35476893186569214,
+      "learning_rate": 1.257504175614412e-05,
+      "loss": 0.6443,
+      "mean_token_accuracy": 0.8738871991634369,
+      "num_tokens": 25854586.0,
+      "step": 15560
+    },
+    {
+      "epoch": 0.7430296233550866,
+      "grad_norm": 0.4192853569984436,
+      "learning_rate": 1.2570269625387736e-05,
+      "loss": 0.6971,
+      "mean_token_accuracy": 0.8595242589712143,
+      "num_tokens": 25871022.0,
+      "step": 15570
+    },
+    {
+      "epoch": 0.7435068421241008,
+      "grad_norm": 0.3494696319103241,
+      "learning_rate": 1.2565497494631353e-05,
+      "loss": 0.6859,
+      "mean_token_accuracy": 0.853353051841259,
+      "num_tokens": 25888407.0,
+      "step": 15580
+    },
+    {
+      "epoch": 0.743984060893115,
+      "grad_norm": 0.3698543906211853,
+      "learning_rate": 1.2560725363874971e-05,
+      "loss": 0.7607,
+      "mean_token_accuracy": 0.8620315045118332,
+      "num_tokens": 25906131.0,
+      "step": 15590
+    },
+    {
+      "epoch": 0.7444612796621292,
+      "grad_norm": 0.3582072854042053,
+      "learning_rate": 1.2555953233118588e-05,
+      "loss": 0.5744,
+      "mean_token_accuracy": 0.8893922328948974,
+      "num_tokens": 25920582.0,
+      "step": 15600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5.837477802884506e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null