File size: 1,943 Bytes

ce7f69c
 
 
 
 
 
 
 
 
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
f55b33a
ce7f69c
f55b33a
ce7f69c
 
 
 
 
f55b33a
 
 
 
 
ce7f69c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55b33a
ce7f69c

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.992,
  "eval_steps": 500,
  "global_step": 62,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.16,
      "grad_norm": 1.192562460899353,
      "learning_rate": 1.9853538358476933e-05,
      "loss": 0.2839,
      "step": 10
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.5562169551849365,
      "learning_rate": 1.736741137876405e-05,
      "loss": 0.2157,
      "step": 20
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9338874816894531,
      "learning_rate": 1.2542183341934873e-05,
      "loss": 0.1568,
      "step": 30
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4656245708465576,
      "learning_rate": 6.909830056250527e-06,
      "loss": 0.0839,
      "step": 40
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7528181076049805,
      "learning_rate": 2.2585838936091753e-06,
      "loss": 0.1057,
      "step": 50
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.33693528175354,
      "learning_rate": 6.51826465144978e-08,
      "loss": 0.1226,
      "step": 60
    },
    {
      "epoch": 0.992,
      "step": 62,
      "total_flos": 1.8193643319066624e+16,
      "train_loss": 0.15735646361304867,
      "train_runtime": 1055.5718,
      "train_samples_per_second": 0.474,
      "train_steps_per_second": 0.059
    }
  ],
  "logging_steps": 10,
  "max_steps": 62,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.8193643319066624e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}