zenz-v1-checkpoints / trainer_state.json
Miwa-Keita's picture
Upload 10 files
174f19e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 50000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0151,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 9.960000000000001e-06,
"loss": 0.0149,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 9.940000000000001e-06,
"loss": 0.0148,
"step": 300
},
{
"epoch": 0.01,
"learning_rate": 9.920000000000002e-06,
"loss": 0.0148,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 9.9e-06,
"loss": 0.0155,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 9.88e-06,
"loss": 0.0154,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 9.86e-06,
"loss": 0.0146,
"step": 700
},
{
"epoch": 0.02,
"learning_rate": 9.84e-06,
"loss": 0.0148,
"step": 800
},
{
"epoch": 0.02,
"learning_rate": 9.820000000000001e-06,
"loss": 0.0158,
"step": 900
},
{
"epoch": 0.02,
"learning_rate": 9.800000000000001e-06,
"loss": 0.0163,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 9.780000000000001e-06,
"loss": 0.0152,
"step": 1100
},
{
"epoch": 0.02,
"learning_rate": 9.760000000000001e-06,
"loss": 0.0147,
"step": 1200
},
{
"epoch": 0.03,
"learning_rate": 9.74e-06,
"loss": 0.016,
"step": 1300
},
{
"epoch": 0.03,
"learning_rate": 9.72e-06,
"loss": 0.0145,
"step": 1400
},
{
"epoch": 0.03,
"learning_rate": 9.7e-06,
"loss": 0.0149,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 9.68e-06,
"loss": 0.0151,
"step": 1600
},
{
"epoch": 0.03,
"learning_rate": 9.66e-06,
"loss": 0.0149,
"step": 1700
},
{
"epoch": 0.04,
"learning_rate": 9.640000000000001e-06,
"loss": 0.015,
"step": 1800
},
{
"epoch": 0.04,
"learning_rate": 9.620000000000001e-06,
"loss": 0.0154,
"step": 1900
},
{
"epoch": 0.04,
"learning_rate": 9.600000000000001e-06,
"loss": 0.0159,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 9.58e-06,
"loss": 0.0156,
"step": 2100
},
{
"epoch": 0.04,
"learning_rate": 9.56e-06,
"loss": 0.0143,
"step": 2200
},
{
"epoch": 0.05,
"learning_rate": 9.54e-06,
"loss": 0.0151,
"step": 2300
},
{
"epoch": 0.05,
"learning_rate": 9.52e-06,
"loss": 0.015,
"step": 2400
},
{
"epoch": 0.05,
"learning_rate": 9.5e-06,
"loss": 0.0149,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 9.48e-06,
"loss": 0.0148,
"step": 2600
},
{
"epoch": 0.05,
"learning_rate": 9.460000000000001e-06,
"loss": 0.0156,
"step": 2700
},
{
"epoch": 0.06,
"learning_rate": 9.440000000000001e-06,
"loss": 0.0144,
"step": 2800
},
{
"epoch": 0.06,
"learning_rate": 9.42e-06,
"loss": 0.0156,
"step": 2900
},
{
"epoch": 0.06,
"learning_rate": 9.4e-06,
"loss": 0.0155,
"step": 3000
},
{
"epoch": 0.06,
"learning_rate": 9.38e-06,
"loss": 0.0148,
"step": 3100
},
{
"epoch": 0.06,
"learning_rate": 9.360000000000002e-06,
"loss": 0.0154,
"step": 3200
},
{
"epoch": 0.07,
"learning_rate": 9.340000000000002e-06,
"loss": 0.0147,
"step": 3300
},
{
"epoch": 0.07,
"learning_rate": 9.32e-06,
"loss": 0.0156,
"step": 3400
},
{
"epoch": 0.07,
"learning_rate": 9.3e-06,
"loss": 0.0147,
"step": 3500
},
{
"epoch": 0.07,
"learning_rate": 9.280000000000001e-06,
"loss": 0.0149,
"step": 3600
},
{
"epoch": 0.07,
"learning_rate": 9.260000000000001e-06,
"loss": 0.0147,
"step": 3700
},
{
"epoch": 0.08,
"learning_rate": 9.240000000000001e-06,
"loss": 0.0146,
"step": 3800
},
{
"epoch": 0.08,
"learning_rate": 9.220000000000002e-06,
"loss": 0.0152,
"step": 3900
},
{
"epoch": 0.08,
"learning_rate": 9.200000000000002e-06,
"loss": 0.015,
"step": 4000
},
{
"epoch": 0.08,
"learning_rate": 9.180000000000002e-06,
"loss": 0.0156,
"step": 4100
},
{
"epoch": 0.08,
"learning_rate": 9.16e-06,
"loss": 0.0149,
"step": 4200
},
{
"epoch": 0.09,
"learning_rate": 9.14e-06,
"loss": 0.0151,
"step": 4300
},
{
"epoch": 0.09,
"learning_rate": 9.12e-06,
"loss": 0.0149,
"step": 4400
},
{
"epoch": 0.09,
"learning_rate": 9.100000000000001e-06,
"loss": 0.0156,
"step": 4500
},
{
"epoch": 0.09,
"learning_rate": 9.080000000000001e-06,
"loss": 0.0154,
"step": 4600
},
{
"epoch": 0.09,
"learning_rate": 9.060000000000001e-06,
"loss": 0.015,
"step": 4700
},
{
"epoch": 0.1,
"learning_rate": 9.040000000000002e-06,
"loss": 0.0151,
"step": 4800
},
{
"epoch": 0.1,
"learning_rate": 9.020000000000002e-06,
"loss": 0.0148,
"step": 4900
},
{
"epoch": 0.1,
"learning_rate": 9e-06,
"loss": 0.0153,
"step": 5000
},
{
"epoch": 0.1,
"learning_rate": 8.98e-06,
"loss": 0.0157,
"step": 5100
},
{
"epoch": 0.1,
"learning_rate": 8.96e-06,
"loss": 0.0149,
"step": 5200
},
{
"epoch": 0.11,
"learning_rate": 8.94e-06,
"loss": 0.0149,
"step": 5300
},
{
"epoch": 0.11,
"learning_rate": 8.920000000000001e-06,
"loss": 0.0142,
"step": 5400
},
{
"epoch": 0.11,
"learning_rate": 8.900000000000001e-06,
"loss": 0.0147,
"step": 5500
},
{
"epoch": 0.11,
"learning_rate": 8.880000000000001e-06,
"loss": 0.0156,
"step": 5600
},
{
"epoch": 0.11,
"learning_rate": 8.860000000000002e-06,
"loss": 0.015,
"step": 5700
},
{
"epoch": 0.12,
"learning_rate": 8.84e-06,
"loss": 0.015,
"step": 5800
},
{
"epoch": 0.12,
"learning_rate": 8.82e-06,
"loss": 0.0151,
"step": 5900
},
{
"epoch": 0.12,
"learning_rate": 8.8e-06,
"loss": 0.0152,
"step": 6000
},
{
"epoch": 0.12,
"learning_rate": 8.78e-06,
"loss": 0.015,
"step": 6100
},
{
"epoch": 0.12,
"learning_rate": 8.76e-06,
"loss": 0.016,
"step": 6200
},
{
"epoch": 0.13,
"learning_rate": 8.740000000000001e-06,
"loss": 0.0158,
"step": 6300
},
{
"epoch": 0.13,
"learning_rate": 8.720000000000001e-06,
"loss": 0.0147,
"step": 6400
},
{
"epoch": 0.13,
"learning_rate": 8.700000000000001e-06,
"loss": 0.0147,
"step": 6500
},
{
"epoch": 0.13,
"learning_rate": 8.68e-06,
"loss": 0.015,
"step": 6600
},
{
"epoch": 0.13,
"learning_rate": 8.66e-06,
"loss": 0.0144,
"step": 6700
},
{
"epoch": 0.14,
"learning_rate": 8.64e-06,
"loss": 0.0147,
"step": 6800
},
{
"epoch": 0.14,
"learning_rate": 8.62e-06,
"loss": 0.0152,
"step": 6900
},
{
"epoch": 0.14,
"learning_rate": 8.6e-06,
"loss": 0.0152,
"step": 7000
},
{
"epoch": 0.14,
"learning_rate": 8.580000000000001e-06,
"loss": 0.0148,
"step": 7100
},
{
"epoch": 0.14,
"learning_rate": 8.560000000000001e-06,
"loss": 0.0146,
"step": 7200
},
{
"epoch": 0.15,
"learning_rate": 8.540000000000001e-06,
"loss": 0.0148,
"step": 7300
},
{
"epoch": 0.15,
"learning_rate": 8.52e-06,
"loss": 0.0147,
"step": 7400
},
{
"epoch": 0.15,
"learning_rate": 8.5e-06,
"loss": 0.0149,
"step": 7500
},
{
"epoch": 0.15,
"learning_rate": 8.48e-06,
"loss": 0.0153,
"step": 7600
},
{
"epoch": 0.15,
"learning_rate": 8.46e-06,
"loss": 0.0145,
"step": 7700
},
{
"epoch": 0.16,
"learning_rate": 8.44e-06,
"loss": 0.0149,
"step": 7800
},
{
"epoch": 0.16,
"learning_rate": 8.42e-06,
"loss": 0.015,
"step": 7900
},
{
"epoch": 0.16,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0149,
"step": 8000
},
{
"epoch": 0.16,
"learning_rate": 8.380000000000001e-06,
"loss": 0.0148,
"step": 8100
},
{
"epoch": 0.16,
"learning_rate": 8.36e-06,
"loss": 0.0153,
"step": 8200
},
{
"epoch": 0.17,
"learning_rate": 8.34e-06,
"loss": 0.0152,
"step": 8300
},
{
"epoch": 0.17,
"learning_rate": 8.32e-06,
"loss": 0.0148,
"step": 8400
},
{
"epoch": 0.17,
"learning_rate": 8.3e-06,
"loss": 0.0151,
"step": 8500
},
{
"epoch": 0.17,
"learning_rate": 8.28e-06,
"loss": 0.0147,
"step": 8600
},
{
"epoch": 0.17,
"learning_rate": 8.26e-06,
"loss": 0.0157,
"step": 8700
},
{
"epoch": 0.18,
"learning_rate": 8.24e-06,
"loss": 0.015,
"step": 8800
},
{
"epoch": 0.18,
"learning_rate": 8.220000000000001e-06,
"loss": 0.0148,
"step": 8900
},
{
"epoch": 0.18,
"learning_rate": 8.2e-06,
"loss": 0.0148,
"step": 9000
},
{
"epoch": 0.18,
"learning_rate": 8.18e-06,
"loss": 0.015,
"step": 9100
},
{
"epoch": 0.18,
"learning_rate": 8.16e-06,
"loss": 0.0157,
"step": 9200
},
{
"epoch": 0.19,
"learning_rate": 8.14e-06,
"loss": 0.0151,
"step": 9300
},
{
"epoch": 0.19,
"learning_rate": 8.120000000000002e-06,
"loss": 0.0149,
"step": 9400
},
{
"epoch": 0.19,
"learning_rate": 8.1e-06,
"loss": 0.0144,
"step": 9500
},
{
"epoch": 0.19,
"learning_rate": 8.08e-06,
"loss": 0.0139,
"step": 9600
},
{
"epoch": 0.19,
"learning_rate": 8.06e-06,
"loss": 0.0147,
"step": 9700
},
{
"epoch": 0.2,
"learning_rate": 8.040000000000001e-06,
"loss": 0.0148,
"step": 9800
},
{
"epoch": 0.2,
"learning_rate": 8.020000000000001e-06,
"loss": 0.0149,
"step": 9900
},
{
"epoch": 0.2,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0147,
"step": 10000
},
{
"epoch": 0.2,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0151,
"step": 10100
},
{
"epoch": 0.2,
"learning_rate": 7.960000000000002e-06,
"loss": 0.0153,
"step": 10200
},
{
"epoch": 0.21,
"learning_rate": 7.94e-06,
"loss": 0.0154,
"step": 10300
},
{
"epoch": 0.21,
"learning_rate": 7.92e-06,
"loss": 0.015,
"step": 10400
},
{
"epoch": 0.21,
"learning_rate": 7.9e-06,
"loss": 0.0148,
"step": 10500
},
{
"epoch": 0.21,
"learning_rate": 7.88e-06,
"loss": 0.0152,
"step": 10600
},
{
"epoch": 0.21,
"learning_rate": 7.860000000000001e-06,
"loss": 0.0143,
"step": 10700
},
{
"epoch": 0.22,
"learning_rate": 7.840000000000001e-06,
"loss": 0.0153,
"step": 10800
},
{
"epoch": 0.22,
"learning_rate": 7.820000000000001e-06,
"loss": 0.0154,
"step": 10900
},
{
"epoch": 0.22,
"learning_rate": 7.800000000000002e-06,
"loss": 0.0148,
"step": 11000
},
{
"epoch": 0.22,
"learning_rate": 7.78e-06,
"loss": 0.0154,
"step": 11100
},
{
"epoch": 0.22,
"learning_rate": 7.76e-06,
"loss": 0.0146,
"step": 11200
},
{
"epoch": 0.23,
"learning_rate": 7.74e-06,
"loss": 0.0149,
"step": 11300
},
{
"epoch": 0.23,
"learning_rate": 7.72e-06,
"loss": 0.0149,
"step": 11400
},
{
"epoch": 0.23,
"learning_rate": 7.7e-06,
"loss": 0.0151,
"step": 11500
},
{
"epoch": 0.23,
"learning_rate": 7.680000000000001e-06,
"loss": 0.0146,
"step": 11600
},
{
"epoch": 0.23,
"learning_rate": 7.660000000000001e-06,
"loss": 0.0148,
"step": 11700
},
{
"epoch": 0.24,
"learning_rate": 7.640000000000001e-06,
"loss": 0.0147,
"step": 11800
},
{
"epoch": 0.24,
"learning_rate": 7.620000000000001e-06,
"loss": 0.0145,
"step": 11900
},
{
"epoch": 0.24,
"learning_rate": 7.600000000000001e-06,
"loss": 0.0149,
"step": 12000
},
{
"epoch": 0.24,
"learning_rate": 7.58e-06,
"loss": 0.015,
"step": 12100
},
{
"epoch": 0.24,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.015,
"step": 12200
},
{
"epoch": 0.25,
"learning_rate": 7.540000000000001e-06,
"loss": 0.0148,
"step": 12300
},
{
"epoch": 0.25,
"learning_rate": 7.520000000000001e-06,
"loss": 0.0144,
"step": 12400
},
{
"epoch": 0.25,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0148,
"step": 12500
},
{
"epoch": 0.25,
"learning_rate": 7.48e-06,
"loss": 0.0151,
"step": 12600
},
{
"epoch": 0.25,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.0146,
"step": 12700
},
{
"epoch": 0.26,
"learning_rate": 7.440000000000001e-06,
"loss": 0.0154,
"step": 12800
},
{
"epoch": 0.26,
"learning_rate": 7.420000000000001e-06,
"loss": 0.0153,
"step": 12900
},
{
"epoch": 0.26,
"learning_rate": 7.4e-06,
"loss": 0.0145,
"step": 13000
},
{
"epoch": 0.26,
"learning_rate": 7.3800000000000005e-06,
"loss": 0.0152,
"step": 13100
},
{
"epoch": 0.26,
"learning_rate": 7.360000000000001e-06,
"loss": 0.0155,
"step": 13200
},
{
"epoch": 0.27,
"learning_rate": 7.340000000000001e-06,
"loss": 0.0149,
"step": 13300
},
{
"epoch": 0.27,
"learning_rate": 7.32e-06,
"loss": 0.0148,
"step": 13400
},
{
"epoch": 0.27,
"learning_rate": 7.3e-06,
"loss": 0.015,
"step": 13500
},
{
"epoch": 0.27,
"learning_rate": 7.280000000000001e-06,
"loss": 0.015,
"step": 13600
},
{
"epoch": 0.27,
"learning_rate": 7.260000000000001e-06,
"loss": 0.0149,
"step": 13700
},
{
"epoch": 0.28,
"learning_rate": 7.24e-06,
"loss": 0.0154,
"step": 13800
},
{
"epoch": 0.28,
"learning_rate": 7.22e-06,
"loss": 0.0148,
"step": 13900
},
{
"epoch": 0.28,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.0149,
"step": 14000
},
{
"epoch": 0.28,
"learning_rate": 7.180000000000001e-06,
"loss": 0.0147,
"step": 14100
},
{
"epoch": 0.28,
"learning_rate": 7.16e-06,
"loss": 0.0148,
"step": 14200
},
{
"epoch": 0.29,
"learning_rate": 7.14e-06,
"loss": 0.0136,
"step": 14300
},
{
"epoch": 0.29,
"learning_rate": 7.1200000000000004e-06,
"loss": 0.0146,
"step": 14400
},
{
"epoch": 0.29,
"learning_rate": 7.100000000000001e-06,
"loss": 0.0155,
"step": 14500
},
{
"epoch": 0.29,
"learning_rate": 7.08e-06,
"loss": 0.0152,
"step": 14600
},
{
"epoch": 0.29,
"learning_rate": 7.06e-06,
"loss": 0.0153,
"step": 14700
},
{
"epoch": 0.3,
"learning_rate": 7.04e-06,
"loss": 0.0145,
"step": 14800
},
{
"epoch": 0.3,
"learning_rate": 7.0200000000000006e-06,
"loss": 0.0149,
"step": 14900
},
{
"epoch": 0.3,
"learning_rate": 7e-06,
"loss": 0.0144,
"step": 15000
},
{
"epoch": 0.3,
"learning_rate": 6.98e-06,
"loss": 0.0146,
"step": 15100
},
{
"epoch": 0.3,
"learning_rate": 6.96e-06,
"loss": 0.0149,
"step": 15200
},
{
"epoch": 0.31,
"learning_rate": 6.9400000000000005e-06,
"loss": 0.0155,
"step": 15300
},
{
"epoch": 0.31,
"learning_rate": 6.92e-06,
"loss": 0.0148,
"step": 15400
},
{
"epoch": 0.31,
"learning_rate": 6.9e-06,
"loss": 0.0152,
"step": 15500
},
{
"epoch": 0.31,
"learning_rate": 6.88e-06,
"loss": 0.015,
"step": 15600
},
{
"epoch": 0.31,
"learning_rate": 6.860000000000001e-06,
"loss": 0.0147,
"step": 15700
},
{
"epoch": 0.32,
"learning_rate": 6.8400000000000014e-06,
"loss": 0.0139,
"step": 15800
},
{
"epoch": 0.32,
"learning_rate": 6.820000000000001e-06,
"loss": 0.0149,
"step": 15900
},
{
"epoch": 0.32,
"learning_rate": 6.800000000000001e-06,
"loss": 0.0149,
"step": 16000
},
{
"epoch": 0.32,
"learning_rate": 6.780000000000001e-06,
"loss": 0.0152,
"step": 16100
},
{
"epoch": 0.32,
"learning_rate": 6.760000000000001e-06,
"loss": 0.0154,
"step": 16200
},
{
"epoch": 0.33,
"learning_rate": 6.740000000000001e-06,
"loss": 0.0152,
"step": 16300
},
{
"epoch": 0.33,
"learning_rate": 6.720000000000001e-06,
"loss": 0.0152,
"step": 16400
},
{
"epoch": 0.33,
"learning_rate": 6.700000000000001e-06,
"loss": 0.0145,
"step": 16500
},
{
"epoch": 0.33,
"learning_rate": 6.680000000000001e-06,
"loss": 0.0145,
"step": 16600
},
{
"epoch": 0.33,
"learning_rate": 6.660000000000001e-06,
"loss": 0.0143,
"step": 16700
},
{
"epoch": 0.34,
"learning_rate": 6.640000000000001e-06,
"loss": 0.0147,
"step": 16800
},
{
"epoch": 0.34,
"learning_rate": 6.620000000000001e-06,
"loss": 0.0142,
"step": 16900
},
{
"epoch": 0.34,
"learning_rate": 6.600000000000001e-06,
"loss": 0.0149,
"step": 17000
},
{
"epoch": 0.34,
"learning_rate": 6.5800000000000005e-06,
"loss": 0.0148,
"step": 17100
},
{
"epoch": 0.34,
"learning_rate": 6.560000000000001e-06,
"loss": 0.014,
"step": 17200
},
{
"epoch": 0.35,
"learning_rate": 6.540000000000001e-06,
"loss": 0.0148,
"step": 17300
},
{
"epoch": 0.35,
"learning_rate": 6.520000000000001e-06,
"loss": 0.015,
"step": 17400
},
{
"epoch": 0.35,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0154,
"step": 17500
},
{
"epoch": 0.35,
"learning_rate": 6.480000000000001e-06,
"loss": 0.0147,
"step": 17600
},
{
"epoch": 0.35,
"learning_rate": 6.460000000000001e-06,
"loss": 0.0146,
"step": 17700
},
{
"epoch": 0.36,
"learning_rate": 6.440000000000001e-06,
"loss": 0.0146,
"step": 17800
},
{
"epoch": 0.36,
"learning_rate": 6.42e-06,
"loss": 0.0142,
"step": 17900
},
{
"epoch": 0.36,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0155,
"step": 18000
},
{
"epoch": 0.36,
"learning_rate": 6.380000000000001e-06,
"loss": 0.0141,
"step": 18100
},
{
"epoch": 0.36,
"learning_rate": 6.360000000000001e-06,
"loss": 0.0151,
"step": 18200
},
{
"epoch": 0.37,
"learning_rate": 6.34e-06,
"loss": 0.0145,
"step": 18300
},
{
"epoch": 0.37,
"learning_rate": 6.3200000000000005e-06,
"loss": 0.0144,
"step": 18400
},
{
"epoch": 0.37,
"learning_rate": 6.300000000000001e-06,
"loss": 0.015,
"step": 18500
},
{
"epoch": 0.37,
"learning_rate": 6.280000000000001e-06,
"loss": 0.0153,
"step": 18600
},
{
"epoch": 0.37,
"learning_rate": 6.26e-06,
"loss": 0.0137,
"step": 18700
},
{
"epoch": 0.38,
"learning_rate": 6.24e-06,
"loss": 0.0145,
"step": 18800
},
{
"epoch": 0.38,
"learning_rate": 6.220000000000001e-06,
"loss": 0.0149,
"step": 18900
},
{
"epoch": 0.38,
"learning_rate": 6.200000000000001e-06,
"loss": 0.0147,
"step": 19000
},
{
"epoch": 0.38,
"learning_rate": 6.18e-06,
"loss": 0.015,
"step": 19100
},
{
"epoch": 0.38,
"learning_rate": 6.16e-06,
"loss": 0.014,
"step": 19200
},
{
"epoch": 0.39,
"learning_rate": 6.1400000000000005e-06,
"loss": 0.0142,
"step": 19300
},
{
"epoch": 0.39,
"learning_rate": 6.120000000000001e-06,
"loss": 0.0148,
"step": 19400
},
{
"epoch": 0.39,
"learning_rate": 6.1e-06,
"loss": 0.0147,
"step": 19500
},
{
"epoch": 0.39,
"learning_rate": 6.08e-06,
"loss": 0.0142,
"step": 19600
},
{
"epoch": 0.39,
"learning_rate": 6.0600000000000004e-06,
"loss": 0.0154,
"step": 19700
},
{
"epoch": 0.4,
"learning_rate": 6.040000000000001e-06,
"loss": 0.0144,
"step": 19800
},
{
"epoch": 0.4,
"learning_rate": 6.02e-06,
"loss": 0.0145,
"step": 19900
},
{
"epoch": 0.4,
"learning_rate": 6e-06,
"loss": 0.0144,
"step": 20000
},
{
"epoch": 0.4,
"learning_rate": 5.98e-06,
"loss": 0.0145,
"step": 20100
},
{
"epoch": 0.4,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.0141,
"step": 20200
},
{
"epoch": 0.41,
"learning_rate": 5.94e-06,
"loss": 0.0139,
"step": 20300
},
{
"epoch": 0.41,
"learning_rate": 5.92e-06,
"loss": 0.014,
"step": 20400
},
{
"epoch": 0.41,
"learning_rate": 5.9e-06,
"loss": 0.0152,
"step": 20500
},
{
"epoch": 0.41,
"learning_rate": 5.8800000000000005e-06,
"loss": 0.0149,
"step": 20600
},
{
"epoch": 0.41,
"learning_rate": 5.86e-06,
"loss": 0.0139,
"step": 20700
},
{
"epoch": 0.42,
"learning_rate": 5.84e-06,
"loss": 0.0146,
"step": 20800
},
{
"epoch": 0.42,
"learning_rate": 5.82e-06,
"loss": 0.015,
"step": 20900
},
{
"epoch": 0.42,
"learning_rate": 5.8e-06,
"loss": 0.0151,
"step": 21000
},
{
"epoch": 0.42,
"learning_rate": 5.78e-06,
"loss": 0.0153,
"step": 21100
},
{
"epoch": 0.42,
"learning_rate": 5.76e-06,
"loss": 0.0148,
"step": 21200
},
{
"epoch": 0.43,
"learning_rate": 5.74e-06,
"loss": 0.015,
"step": 21300
},
{
"epoch": 0.43,
"learning_rate": 5.72e-06,
"loss": 0.0151,
"step": 21400
},
{
"epoch": 0.43,
"learning_rate": 5.7e-06,
"loss": 0.0151,
"step": 21500
},
{
"epoch": 0.43,
"learning_rate": 5.68e-06,
"loss": 0.0144,
"step": 21600
},
{
"epoch": 0.43,
"learning_rate": 5.66e-06,
"loss": 0.0148,
"step": 21700
},
{
"epoch": 0.44,
"learning_rate": 5.64e-06,
"loss": 0.0145,
"step": 21800
},
{
"epoch": 0.44,
"learning_rate": 5.620000000000001e-06,
"loss": 0.0147,
"step": 21900
},
{
"epoch": 0.44,
"learning_rate": 5.600000000000001e-06,
"loss": 0.0151,
"step": 22000
},
{
"epoch": 0.44,
"learning_rate": 5.580000000000001e-06,
"loss": 0.0138,
"step": 22100
},
{
"epoch": 0.44,
"learning_rate": 5.560000000000001e-06,
"loss": 0.0147,
"step": 22200
},
{
"epoch": 0.45,
"learning_rate": 5.540000000000001e-06,
"loss": 0.0146,
"step": 22300
},
{
"epoch": 0.45,
"learning_rate": 5.5200000000000005e-06,
"loss": 0.0147,
"step": 22400
},
{
"epoch": 0.45,
"learning_rate": 5.500000000000001e-06,
"loss": 0.014,
"step": 22500
},
{
"epoch": 0.45,
"learning_rate": 5.480000000000001e-06,
"loss": 0.0144,
"step": 22600
},
{
"epoch": 0.45,
"learning_rate": 5.460000000000001e-06,
"loss": 0.0141,
"step": 22700
},
{
"epoch": 0.46,
"learning_rate": 5.4400000000000004e-06,
"loss": 0.0143,
"step": 22800
},
{
"epoch": 0.46,
"learning_rate": 5.420000000000001e-06,
"loss": 0.0146,
"step": 22900
},
{
"epoch": 0.46,
"learning_rate": 5.400000000000001e-06,
"loss": 0.014,
"step": 23000
},
{
"epoch": 0.46,
"learning_rate": 5.380000000000001e-06,
"loss": 0.0147,
"step": 23100
},
{
"epoch": 0.46,
"learning_rate": 5.36e-06,
"loss": 0.0147,
"step": 23200
},
{
"epoch": 0.47,
"learning_rate": 5.3400000000000005e-06,
"loss": 0.0145,
"step": 23300
},
{
"epoch": 0.47,
"learning_rate": 5.320000000000001e-06,
"loss": 0.0146,
"step": 23400
},
{
"epoch": 0.47,
"learning_rate": 5.300000000000001e-06,
"loss": 0.0142,
"step": 23500
},
{
"epoch": 0.47,
"learning_rate": 5.28e-06,
"loss": 0.0142,
"step": 23600
},
{
"epoch": 0.47,
"learning_rate": 5.2600000000000005e-06,
"loss": 0.0138,
"step": 23700
},
{
"epoch": 0.48,
"learning_rate": 5.240000000000001e-06,
"loss": 0.0138,
"step": 23800
},
{
"epoch": 0.48,
"learning_rate": 5.220000000000001e-06,
"loss": 0.0144,
"step": 23900
},
{
"epoch": 0.48,
"learning_rate": 5.2e-06,
"loss": 0.0146,
"step": 24000
},
{
"epoch": 0.48,
"learning_rate": 5.18e-06,
"loss": 0.0147,
"step": 24100
},
{
"epoch": 0.48,
"learning_rate": 5.1600000000000006e-06,
"loss": 0.0158,
"step": 24200
},
{
"epoch": 0.49,
"learning_rate": 5.140000000000001e-06,
"loss": 0.015,
"step": 24300
},
{
"epoch": 0.49,
"learning_rate": 5.12e-06,
"loss": 0.0146,
"step": 24400
},
{
"epoch": 0.49,
"learning_rate": 5.1e-06,
"loss": 0.0154,
"step": 24500
},
{
"epoch": 0.49,
"learning_rate": 5.0800000000000005e-06,
"loss": 0.0143,
"step": 24600
},
{
"epoch": 0.49,
"learning_rate": 5.060000000000001e-06,
"loss": 0.0142,
"step": 24700
},
{
"epoch": 0.5,
"learning_rate": 5.04e-06,
"loss": 0.0147,
"step": 24800
},
{
"epoch": 0.5,
"learning_rate": 5.02e-06,
"loss": 0.0145,
"step": 24900
},
{
"epoch": 0.5,
"learning_rate": 5e-06,
"loss": 0.0143,
"step": 25000
},
{
"epoch": 0.5,
"learning_rate": 4.980000000000001e-06,
"loss": 0.0142,
"step": 25100
},
{
"epoch": 0.5,
"learning_rate": 4.960000000000001e-06,
"loss": 0.0148,
"step": 25200
},
{
"epoch": 0.51,
"learning_rate": 4.94e-06,
"loss": 0.0146,
"step": 25300
},
{
"epoch": 0.51,
"learning_rate": 4.92e-06,
"loss": 0.0148,
"step": 25400
},
{
"epoch": 0.51,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.0141,
"step": 25500
},
{
"epoch": 0.51,
"learning_rate": 4.880000000000001e-06,
"loss": 0.0149,
"step": 25600
},
{
"epoch": 0.51,
"learning_rate": 4.86e-06,
"loss": 0.0144,
"step": 25700
},
{
"epoch": 0.52,
"learning_rate": 4.84e-06,
"loss": 0.0141,
"step": 25800
},
{
"epoch": 0.52,
"learning_rate": 4.8200000000000004e-06,
"loss": 0.0142,
"step": 25900
},
{
"epoch": 0.52,
"learning_rate": 4.800000000000001e-06,
"loss": 0.0143,
"step": 26000
},
{
"epoch": 0.52,
"learning_rate": 4.78e-06,
"loss": 0.0145,
"step": 26100
},
{
"epoch": 0.52,
"learning_rate": 4.76e-06,
"loss": 0.0145,
"step": 26200
},
{
"epoch": 0.53,
"learning_rate": 4.74e-06,
"loss": 0.014,
"step": 26300
},
{
"epoch": 0.53,
"learning_rate": 4.7200000000000005e-06,
"loss": 0.0153,
"step": 26400
},
{
"epoch": 0.53,
"learning_rate": 4.7e-06,
"loss": 0.0151,
"step": 26500
},
{
"epoch": 0.53,
"learning_rate": 4.680000000000001e-06,
"loss": 0.0143,
"step": 26600
},
{
"epoch": 0.53,
"learning_rate": 4.66e-06,
"loss": 0.0144,
"step": 26700
},
{
"epoch": 0.54,
"learning_rate": 4.6400000000000005e-06,
"loss": 0.0142,
"step": 26800
},
{
"epoch": 0.54,
"learning_rate": 4.620000000000001e-06,
"loss": 0.0138,
"step": 26900
},
{
"epoch": 0.54,
"learning_rate": 4.600000000000001e-06,
"loss": 0.0145,
"step": 27000
},
{
"epoch": 0.54,
"learning_rate": 4.58e-06,
"loss": 0.0137,
"step": 27100
},
{
"epoch": 0.54,
"learning_rate": 4.56e-06,
"loss": 0.0142,
"step": 27200
},
{
"epoch": 0.55,
"learning_rate": 4.540000000000001e-06,
"loss": 0.0142,
"step": 27300
},
{
"epoch": 0.55,
"learning_rate": 4.520000000000001e-06,
"loss": 0.0139,
"step": 27400
},
{
"epoch": 0.55,
"learning_rate": 4.5e-06,
"loss": 0.015,
"step": 27500
},
{
"epoch": 0.55,
"learning_rate": 4.48e-06,
"loss": 0.0143,
"step": 27600
},
{
"epoch": 0.55,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.0154,
"step": 27700
},
{
"epoch": 0.56,
"learning_rate": 4.440000000000001e-06,
"loss": 0.0151,
"step": 27800
},
{
"epoch": 0.56,
"learning_rate": 4.42e-06,
"loss": 0.0142,
"step": 27900
},
{
"epoch": 0.56,
"learning_rate": 4.4e-06,
"loss": 0.0141,
"step": 28000
},
{
"epoch": 0.56,
"learning_rate": 4.38e-06,
"loss": 0.0146,
"step": 28100
},
{
"epoch": 0.56,
"learning_rate": 4.360000000000001e-06,
"loss": 0.0129,
"step": 28200
},
{
"epoch": 0.57,
"learning_rate": 4.34e-06,
"loss": 0.0144,
"step": 28300
},
{
"epoch": 0.57,
"learning_rate": 4.32e-06,
"loss": 0.0142,
"step": 28400
},
{
"epoch": 0.57,
"learning_rate": 4.3e-06,
"loss": 0.0144,
"step": 28500
},
{
"epoch": 0.57,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.014,
"step": 28600
},
{
"epoch": 0.57,
"learning_rate": 4.26e-06,
"loss": 0.0139,
"step": 28700
},
{
"epoch": 0.58,
"learning_rate": 4.24e-06,
"loss": 0.0133,
"step": 28800
},
{
"epoch": 0.58,
"learning_rate": 4.22e-06,
"loss": 0.0143,
"step": 28900
},
{
"epoch": 0.58,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.0144,
"step": 29000
},
{
"epoch": 0.58,
"learning_rate": 4.18e-06,
"loss": 0.0142,
"step": 29100
},
{
"epoch": 0.58,
"learning_rate": 4.16e-06,
"loss": 0.0143,
"step": 29200
},
{
"epoch": 0.59,
"learning_rate": 4.14e-06,
"loss": 0.0146,
"step": 29300
},
{
"epoch": 0.59,
"learning_rate": 4.12e-06,
"loss": 0.0141,
"step": 29400
},
{
"epoch": 0.59,
"learning_rate": 4.1e-06,
"loss": 0.0146,
"step": 29500
},
{
"epoch": 0.59,
"learning_rate": 4.08e-06,
"loss": 0.0144,
"step": 29600
},
{
"epoch": 0.59,
"learning_rate": 4.060000000000001e-06,
"loss": 0.0139,
"step": 29700
},
{
"epoch": 0.6,
"learning_rate": 4.04e-06,
"loss": 0.0142,
"step": 29800
},
{
"epoch": 0.6,
"learning_rate": 4.0200000000000005e-06,
"loss": 0.0147,
"step": 29900
},
{
"epoch": 0.6,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0146,
"step": 30000
},
{
"epoch": 0.6,
"learning_rate": 3.980000000000001e-06,
"loss": 0.0143,
"step": 30100
},
{
"epoch": 0.6,
"learning_rate": 3.96e-06,
"loss": 0.0137,
"step": 30200
},
{
"epoch": 0.61,
"learning_rate": 3.94e-06,
"loss": 0.0145,
"step": 30300
},
{
"epoch": 0.61,
"learning_rate": 3.920000000000001e-06,
"loss": 0.0143,
"step": 30400
},
{
"epoch": 0.61,
"learning_rate": 3.900000000000001e-06,
"loss": 0.0147,
"step": 30500
},
{
"epoch": 0.61,
"learning_rate": 3.88e-06,
"loss": 0.0145,
"step": 30600
},
{
"epoch": 0.61,
"learning_rate": 3.86e-06,
"loss": 0.0153,
"step": 30700
},
{
"epoch": 0.62,
"learning_rate": 3.8400000000000005e-06,
"loss": 0.0141,
"step": 30800
},
{
"epoch": 0.62,
"learning_rate": 3.820000000000001e-06,
"loss": 0.0144,
"step": 30900
},
{
"epoch": 0.62,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.0146,
"step": 31000
},
{
"epoch": 0.62,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.0139,
"step": 31100
},
{
"epoch": 0.62,
"learning_rate": 3.7600000000000004e-06,
"loss": 0.0139,
"step": 31200
},
{
"epoch": 0.63,
"learning_rate": 3.74e-06,
"loss": 0.0144,
"step": 31300
},
{
"epoch": 0.63,
"learning_rate": 3.7200000000000004e-06,
"loss": 0.014,
"step": 31400
},
{
"epoch": 0.63,
"learning_rate": 3.7e-06,
"loss": 0.0146,
"step": 31500
},
{
"epoch": 0.63,
"learning_rate": 3.6800000000000003e-06,
"loss": 0.0137,
"step": 31600
},
{
"epoch": 0.63,
"learning_rate": 3.66e-06,
"loss": 0.015,
"step": 31700
},
{
"epoch": 0.64,
"learning_rate": 3.6400000000000003e-06,
"loss": 0.014,
"step": 31800
},
{
"epoch": 0.64,
"learning_rate": 3.62e-06,
"loss": 0.0142,
"step": 31900
},
{
"epoch": 0.64,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.0132,
"step": 32000
},
{
"epoch": 0.64,
"learning_rate": 3.58e-06,
"loss": 0.0142,
"step": 32100
},
{
"epoch": 0.64,
"learning_rate": 3.5600000000000002e-06,
"loss": 0.014,
"step": 32200
},
{
"epoch": 0.65,
"learning_rate": 3.54e-06,
"loss": 0.0137,
"step": 32300
},
{
"epoch": 0.65,
"learning_rate": 3.52e-06,
"loss": 0.0147,
"step": 32400
},
{
"epoch": 0.65,
"learning_rate": 3.5e-06,
"loss": 0.0147,
"step": 32500
},
{
"epoch": 0.65,
"learning_rate": 3.48e-06,
"loss": 0.0146,
"step": 32600
},
{
"epoch": 0.65,
"learning_rate": 3.46e-06,
"loss": 0.0142,
"step": 32700
},
{
"epoch": 0.66,
"learning_rate": 3.44e-06,
"loss": 0.0145,
"step": 32800
},
{
"epoch": 0.66,
"learning_rate": 3.4200000000000007e-06,
"loss": 0.0144,
"step": 32900
},
{
"epoch": 0.66,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.0144,
"step": 33000
},
{
"epoch": 0.66,
"learning_rate": 3.3800000000000007e-06,
"loss": 0.0141,
"step": 33100
},
{
"epoch": 0.66,
"learning_rate": 3.3600000000000004e-06,
"loss": 0.0144,
"step": 33200
},
{
"epoch": 0.67,
"learning_rate": 3.3400000000000006e-06,
"loss": 0.0137,
"step": 33300
},
{
"epoch": 0.67,
"learning_rate": 3.3200000000000004e-06,
"loss": 0.0147,
"step": 33400
},
{
"epoch": 0.67,
"learning_rate": 3.3000000000000006e-06,
"loss": 0.0149,
"step": 33500
},
{
"epoch": 0.67,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.0142,
"step": 33600
},
{
"epoch": 0.67,
"learning_rate": 3.2600000000000006e-06,
"loss": 0.0146,
"step": 33700
},
{
"epoch": 0.68,
"learning_rate": 3.2400000000000003e-06,
"loss": 0.0145,
"step": 33800
},
{
"epoch": 0.68,
"learning_rate": 3.2200000000000005e-06,
"loss": 0.0143,
"step": 33900
},
{
"epoch": 0.68,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.0141,
"step": 34000
},
{
"epoch": 0.68,
"learning_rate": 3.1800000000000005e-06,
"loss": 0.0145,
"step": 34100
},
{
"epoch": 0.68,
"learning_rate": 3.1600000000000002e-06,
"loss": 0.0145,
"step": 34200
},
{
"epoch": 0.69,
"learning_rate": 3.1400000000000004e-06,
"loss": 0.0143,
"step": 34300
},
{
"epoch": 0.69,
"learning_rate": 3.12e-06,
"loss": 0.0143,
"step": 34400
},
{
"epoch": 0.69,
"learning_rate": 3.1000000000000004e-06,
"loss": 0.0146,
"step": 34500
},
{
"epoch": 0.69,
"learning_rate": 3.08e-06,
"loss": 0.0137,
"step": 34600
},
{
"epoch": 0.69,
"learning_rate": 3.0600000000000003e-06,
"loss": 0.0135,
"step": 34700
},
{
"epoch": 0.7,
"learning_rate": 3.04e-06,
"loss": 0.0146,
"step": 34800
},
{
"epoch": 0.7,
"learning_rate": 3.0200000000000003e-06,
"loss": 0.013,
"step": 34900
},
{
"epoch": 0.7,
"learning_rate": 3e-06,
"loss": 0.0143,
"step": 35000
},
{
"epoch": 0.7,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.0135,
"step": 35100
},
{
"epoch": 0.7,
"learning_rate": 2.96e-06,
"loss": 0.0133,
"step": 35200
},
{
"epoch": 0.71,
"learning_rate": 2.9400000000000002e-06,
"loss": 0.0137,
"step": 35300
},
{
"epoch": 0.71,
"learning_rate": 2.92e-06,
"loss": 0.0151,
"step": 35400
},
{
"epoch": 0.71,
"learning_rate": 2.9e-06,
"loss": 0.0138,
"step": 35500
},
{
"epoch": 0.71,
"learning_rate": 2.88e-06,
"loss": 0.014,
"step": 35600
},
{
"epoch": 0.71,
"learning_rate": 2.86e-06,
"loss": 0.014,
"step": 35700
},
{
"epoch": 0.72,
"learning_rate": 2.84e-06,
"loss": 0.0141,
"step": 35800
},
{
"epoch": 0.72,
"learning_rate": 2.82e-06,
"loss": 0.0135,
"step": 35900
},
{
"epoch": 0.72,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.0142,
"step": 36000
},
{
"epoch": 0.72,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.014,
"step": 36100
},
{
"epoch": 0.72,
"learning_rate": 2.7600000000000003e-06,
"loss": 0.0144,
"step": 36200
},
{
"epoch": 0.73,
"learning_rate": 2.7400000000000004e-06,
"loss": 0.0142,
"step": 36300
},
{
"epoch": 0.73,
"learning_rate": 2.7200000000000002e-06,
"loss": 0.0147,
"step": 36400
},
{
"epoch": 0.73,
"learning_rate": 2.7000000000000004e-06,
"loss": 0.0139,
"step": 36500
},
{
"epoch": 0.73,
"learning_rate": 2.68e-06,
"loss": 0.0145,
"step": 36600
},
{
"epoch": 0.73,
"learning_rate": 2.6600000000000004e-06,
"loss": 0.0139,
"step": 36700
},
{
"epoch": 0.74,
"learning_rate": 2.64e-06,
"loss": 0.0137,
"step": 36800
},
{
"epoch": 0.74,
"learning_rate": 2.6200000000000003e-06,
"loss": 0.0142,
"step": 36900
},
{
"epoch": 0.74,
"learning_rate": 2.6e-06,
"loss": 0.0141,
"step": 37000
},
{
"epoch": 0.74,
"learning_rate": 2.5800000000000003e-06,
"loss": 0.0144,
"step": 37100
},
{
"epoch": 0.74,
"learning_rate": 2.56e-06,
"loss": 0.0144,
"step": 37200
},
{
"epoch": 0.75,
"learning_rate": 2.5400000000000002e-06,
"loss": 0.014,
"step": 37300
},
{
"epoch": 0.75,
"learning_rate": 2.52e-06,
"loss": 0.0144,
"step": 37400
},
{
"epoch": 0.75,
"learning_rate": 2.5e-06,
"loss": 0.0144,
"step": 37500
},
{
"epoch": 0.75,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.0132,
"step": 37600
},
{
"epoch": 0.75,
"learning_rate": 2.46e-06,
"loss": 0.014,
"step": 37700
},
{
"epoch": 0.76,
"learning_rate": 2.4400000000000004e-06,
"loss": 0.0134,
"step": 37800
},
{
"epoch": 0.76,
"learning_rate": 2.42e-06,
"loss": 0.0145,
"step": 37900
},
{
"epoch": 0.76,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.0139,
"step": 38000
},
{
"epoch": 0.76,
"learning_rate": 2.38e-06,
"loss": 0.0145,
"step": 38100
},
{
"epoch": 0.76,
"learning_rate": 2.3600000000000003e-06,
"loss": 0.0143,
"step": 38200
},
{
"epoch": 0.77,
"learning_rate": 2.3400000000000005e-06,
"loss": 0.0143,
"step": 38300
},
{
"epoch": 0.77,
"learning_rate": 2.3200000000000002e-06,
"loss": 0.014,
"step": 38400
},
{
"epoch": 0.77,
"learning_rate": 2.3000000000000004e-06,
"loss": 0.0146,
"step": 38500
},
{
"epoch": 0.77,
"learning_rate": 2.28e-06,
"loss": 0.0142,
"step": 38600
},
{
"epoch": 0.77,
"learning_rate": 2.2600000000000004e-06,
"loss": 0.0138,
"step": 38700
},
{
"epoch": 0.78,
"learning_rate": 2.24e-06,
"loss": 0.0141,
"step": 38800
},
{
"epoch": 0.78,
"learning_rate": 2.2200000000000003e-06,
"loss": 0.0142,
"step": 38900
},
{
"epoch": 0.78,
"learning_rate": 2.2e-06,
"loss": 0.0141,
"step": 39000
},
{
"epoch": 0.78,
"learning_rate": 2.1800000000000003e-06,
"loss": 0.0148,
"step": 39100
},
{
"epoch": 0.78,
"learning_rate": 2.16e-06,
"loss": 0.0135,
"step": 39200
},
{
"epoch": 0.79,
"learning_rate": 2.1400000000000003e-06,
"loss": 0.0136,
"step": 39300
},
{
"epoch": 0.79,
"learning_rate": 2.12e-06,
"loss": 0.0143,
"step": 39400
},
{
"epoch": 0.79,
"learning_rate": 2.1000000000000002e-06,
"loss": 0.0139,
"step": 39500
},
{
"epoch": 0.79,
"learning_rate": 2.08e-06,
"loss": 0.0137,
"step": 39600
},
{
"epoch": 0.79,
"learning_rate": 2.06e-06,
"loss": 0.0141,
"step": 39700
},
{
"epoch": 0.8,
"learning_rate": 2.04e-06,
"loss": 0.0138,
"step": 39800
},
{
"epoch": 0.8,
"learning_rate": 2.02e-06,
"loss": 0.014,
"step": 39900
},
{
"epoch": 0.8,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0132,
"step": 40000
},
{
"epoch": 0.8,
"learning_rate": 1.98e-06,
"loss": 0.0137,
"step": 40100
},
{
"epoch": 0.8,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.0141,
"step": 40200
},
{
"epoch": 0.81,
"learning_rate": 1.94e-06,
"loss": 0.0141,
"step": 40300
},
{
"epoch": 0.81,
"learning_rate": 1.9200000000000003e-06,
"loss": 0.0137,
"step": 40400
},
{
"epoch": 0.81,
"learning_rate": 1.9000000000000002e-06,
"loss": 0.0141,
"step": 40500
},
{
"epoch": 0.81,
"learning_rate": 1.8800000000000002e-06,
"loss": 0.0148,
"step": 40600
},
{
"epoch": 0.81,
"learning_rate": 1.8600000000000002e-06,
"loss": 0.0143,
"step": 40700
},
{
"epoch": 0.82,
"learning_rate": 1.8400000000000002e-06,
"loss": 0.0145,
"step": 40800
},
{
"epoch": 0.82,
"learning_rate": 1.8200000000000002e-06,
"loss": 0.0141,
"step": 40900
},
{
"epoch": 0.82,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.0146,
"step": 41000
},
{
"epoch": 0.82,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.0141,
"step": 41100
},
{
"epoch": 0.82,
"learning_rate": 1.76e-06,
"loss": 0.0147,
"step": 41200
},
{
"epoch": 0.83,
"learning_rate": 1.74e-06,
"loss": 0.0136,
"step": 41300
},
{
"epoch": 0.83,
"learning_rate": 1.72e-06,
"loss": 0.0147,
"step": 41400
},
{
"epoch": 0.83,
"learning_rate": 1.7000000000000002e-06,
"loss": 0.0131,
"step": 41500
},
{
"epoch": 0.83,
"learning_rate": 1.6800000000000002e-06,
"loss": 0.0143,
"step": 41600
},
{
"epoch": 0.83,
"learning_rate": 1.6600000000000002e-06,
"loss": 0.0149,
"step": 41700
},
{
"epoch": 0.84,
"learning_rate": 1.6400000000000002e-06,
"loss": 0.0135,
"step": 41800
},
{
"epoch": 0.84,
"learning_rate": 1.6200000000000002e-06,
"loss": 0.0146,
"step": 41900
},
{
"epoch": 0.84,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.015,
"step": 42000
},
{
"epoch": 0.84,
"learning_rate": 1.5800000000000001e-06,
"loss": 0.0148,
"step": 42100
},
{
"epoch": 0.84,
"learning_rate": 1.56e-06,
"loss": 0.0145,
"step": 42200
},
{
"epoch": 0.85,
"learning_rate": 1.54e-06,
"loss": 0.0135,
"step": 42300
},
{
"epoch": 0.85,
"learning_rate": 1.52e-06,
"loss": 0.0138,
"step": 42400
},
{
"epoch": 0.85,
"learning_rate": 1.5e-06,
"loss": 0.014,
"step": 42500
},
{
"epoch": 0.85,
"learning_rate": 1.48e-06,
"loss": 0.0146,
"step": 42600
},
{
"epoch": 0.85,
"learning_rate": 1.46e-06,
"loss": 0.0137,
"step": 42700
},
{
"epoch": 0.86,
"learning_rate": 1.44e-06,
"loss": 0.0137,
"step": 42800
},
{
"epoch": 0.86,
"learning_rate": 1.42e-06,
"loss": 0.0135,
"step": 42900
},
{
"epoch": 0.86,
"learning_rate": 1.4000000000000001e-06,
"loss": 0.0138,
"step": 43000
},
{
"epoch": 0.86,
"learning_rate": 1.3800000000000001e-06,
"loss": 0.0139,
"step": 43100
},
{
"epoch": 0.86,
"learning_rate": 1.3600000000000001e-06,
"loss": 0.0138,
"step": 43200
},
{
"epoch": 0.87,
"learning_rate": 1.34e-06,
"loss": 0.0137,
"step": 43300
},
{
"epoch": 0.87,
"learning_rate": 1.32e-06,
"loss": 0.0139,
"step": 43400
},
{
"epoch": 0.87,
"learning_rate": 1.3e-06,
"loss": 0.014,
"step": 43500
},
{
"epoch": 0.87,
"learning_rate": 1.28e-06,
"loss": 0.0139,
"step": 43600
},
{
"epoch": 0.87,
"learning_rate": 1.26e-06,
"loss": 0.0141,
"step": 43700
},
{
"epoch": 0.88,
"learning_rate": 1.2400000000000002e-06,
"loss": 0.0145,
"step": 43800
},
{
"epoch": 0.88,
"learning_rate": 1.2200000000000002e-06,
"loss": 0.0153,
"step": 43900
},
{
"epoch": 0.88,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.0144,
"step": 44000
},
{
"epoch": 0.88,
"learning_rate": 1.1800000000000001e-06,
"loss": 0.0143,
"step": 44100
},
{
"epoch": 0.88,
"learning_rate": 1.1600000000000001e-06,
"loss": 0.0137,
"step": 44200
},
{
"epoch": 0.89,
"learning_rate": 1.14e-06,
"loss": 0.0139,
"step": 44300
},
{
"epoch": 0.89,
"learning_rate": 1.12e-06,
"loss": 0.0139,
"step": 44400
},
{
"epoch": 0.89,
"learning_rate": 1.1e-06,
"loss": 0.013,
"step": 44500
},
{
"epoch": 0.89,
"learning_rate": 1.08e-06,
"loss": 0.0146,
"step": 44600
},
{
"epoch": 0.89,
"learning_rate": 1.06e-06,
"loss": 0.0145,
"step": 44700
},
{
"epoch": 0.9,
"learning_rate": 1.04e-06,
"loss": 0.0136,
"step": 44800
},
{
"epoch": 0.9,
"learning_rate": 1.02e-06,
"loss": 0.0141,
"step": 44900
},
{
"epoch": 0.9,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0143,
"step": 45000
},
{
"epoch": 0.9,
"learning_rate": 9.800000000000001e-07,
"loss": 0.0138,
"step": 45100
},
{
"epoch": 0.9,
"learning_rate": 9.600000000000001e-07,
"loss": 0.0145,
"step": 45200
},
{
"epoch": 0.91,
"learning_rate": 9.400000000000001e-07,
"loss": 0.0141,
"step": 45300
},
{
"epoch": 0.91,
"learning_rate": 9.200000000000001e-07,
"loss": 0.0141,
"step": 45400
},
{
"epoch": 0.91,
"learning_rate": 9.000000000000001e-07,
"loss": 0.0142,
"step": 45500
},
{
"epoch": 0.91,
"learning_rate": 8.8e-07,
"loss": 0.0139,
"step": 45600
},
{
"epoch": 0.91,
"learning_rate": 8.6e-07,
"loss": 0.0136,
"step": 45700
},
{
"epoch": 0.92,
"learning_rate": 8.400000000000001e-07,
"loss": 0.0138,
"step": 45800
},
{
"epoch": 0.92,
"learning_rate": 8.200000000000001e-07,
"loss": 0.0135,
"step": 45900
},
{
"epoch": 0.92,
"learning_rate": 8.000000000000001e-07,
"loss": 0.014,
"step": 46000
},
{
"epoch": 0.92,
"learning_rate": 7.8e-07,
"loss": 0.0147,
"step": 46100
},
{
"epoch": 0.92,
"learning_rate": 7.6e-07,
"loss": 0.0133,
"step": 46200
},
{
"epoch": 0.93,
"learning_rate": 7.4e-07,
"loss": 0.0147,
"step": 46300
},
{
"epoch": 0.93,
"learning_rate": 7.2e-07,
"loss": 0.014,
"step": 46400
},
{
"epoch": 0.93,
"learning_rate": 7.000000000000001e-07,
"loss": 0.0138,
"step": 46500
},
{
"epoch": 0.93,
"learning_rate": 6.800000000000001e-07,
"loss": 0.0137,
"step": 46600
},
{
"epoch": 0.93,
"learning_rate": 6.6e-07,
"loss": 0.0138,
"step": 46700
},
{
"epoch": 0.94,
"learning_rate": 6.4e-07,
"loss": 0.0148,
"step": 46800
},
{
"epoch": 0.94,
"learning_rate": 6.200000000000001e-07,
"loss": 0.0141,
"step": 46900
},
{
"epoch": 0.94,
"learning_rate": 6.000000000000001e-07,
"loss": 0.0136,
"step": 47000
},
{
"epoch": 0.94,
"learning_rate": 5.800000000000001e-07,
"loss": 0.0144,
"step": 47100
},
{
"epoch": 0.94,
"learning_rate": 5.6e-07,
"loss": 0.0137,
"step": 47200
},
{
"epoch": 0.95,
"learning_rate": 5.4e-07,
"loss": 0.0136,
"step": 47300
},
{
"epoch": 0.95,
"learning_rate": 5.2e-07,
"loss": 0.0144,
"step": 47400
},
{
"epoch": 0.95,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0138,
"step": 47500
},
{
"epoch": 0.95,
"learning_rate": 4.800000000000001e-07,
"loss": 0.0148,
"step": 47600
},
{
"epoch": 0.95,
"learning_rate": 4.6000000000000004e-07,
"loss": 0.0143,
"step": 47700
},
{
"epoch": 0.96,
"learning_rate": 4.4e-07,
"loss": 0.0135,
"step": 47800
},
{
"epoch": 0.96,
"learning_rate": 4.2000000000000006e-07,
"loss": 0.0149,
"step": 47900
},
{
"epoch": 0.96,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.0143,
"step": 48000
},
{
"epoch": 0.96,
"learning_rate": 3.8e-07,
"loss": 0.0131,
"step": 48100
},
{
"epoch": 0.96,
"learning_rate": 3.6e-07,
"loss": 0.0134,
"step": 48200
},
{
"epoch": 0.97,
"learning_rate": 3.4000000000000003e-07,
"loss": 0.0141,
"step": 48300
},
{
"epoch": 0.97,
"learning_rate": 3.2e-07,
"loss": 0.015,
"step": 48400
},
{
"epoch": 0.97,
"learning_rate": 3.0000000000000004e-07,
"loss": 0.0142,
"step": 48500
},
{
"epoch": 0.97,
"learning_rate": 2.8e-07,
"loss": 0.0142,
"step": 48600
},
{
"epoch": 0.97,
"learning_rate": 2.6e-07,
"loss": 0.0141,
"step": 48700
},
{
"epoch": 0.98,
"learning_rate": 2.4000000000000003e-07,
"loss": 0.0148,
"step": 48800
},
{
"epoch": 0.98,
"learning_rate": 2.2e-07,
"loss": 0.0134,
"step": 48900
},
{
"epoch": 0.98,
"learning_rate": 2.0000000000000002e-07,
"loss": 0.0147,
"step": 49000
},
{
"epoch": 0.98,
"learning_rate": 1.8e-07,
"loss": 0.0138,
"step": 49100
},
{
"epoch": 0.98,
"learning_rate": 1.6e-07,
"loss": 0.0142,
"step": 49200
},
{
"epoch": 0.99,
"learning_rate": 1.4e-07,
"loss": 0.0141,
"step": 49300
},
{
"epoch": 0.99,
"learning_rate": 1.2000000000000002e-07,
"loss": 0.0139,
"step": 49400
},
{
"epoch": 0.99,
"learning_rate": 1.0000000000000001e-07,
"loss": 0.0141,
"step": 49500
},
{
"epoch": 0.99,
"learning_rate": 8e-08,
"loss": 0.0146,
"step": 49600
},
{
"epoch": 0.99,
"learning_rate": 6.000000000000001e-08,
"loss": 0.0142,
"step": 49700
},
{
"epoch": 1.0,
"learning_rate": 4e-08,
"loss": 0.0145,
"step": 49800
},
{
"epoch": 1.0,
"learning_rate": 2e-08,
"loss": 0.0146,
"step": 49900
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 0.0142,
"step": 50000
}
],
"max_steps": 50000,
"num_train_epochs": 1,
"total_flos": 1.371783168e+17,
"trial_name": null,
"trial_params": null
}