File size: 2,674 Bytes
7b551c0
 
 
21f2c3f
 
 
7b551c0
 
 
 
 
21f2c3f
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
7b551c0
 
21f2c3f
 
 
 
 
 
 
7b551c0
 
 
21f2c3f
7b551c0
 
21f2c3f
 
 
7b551c0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.08815232722143865,
  "eval_steps": 500,
  "global_step": 2500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.01763046544428773,
      "grad_norm": 1.5435048341751099,
      "learning_rate": 4.9706158909261876e-05,
      "loss": 0.8407,
      "step": 500
    },
    {
      "epoch": 0.01763046544428773,
      "eval_accuracy": 0.8252411503248731,
      "eval_loss": 0.7565935850143433,
      "eval_runtime": 2895.6955,
      "eval_samples_per_second": 32.989,
      "eval_steps_per_second": 0.516,
      "step": 500
    },
    {
      "epoch": 0.03526093088857546,
      "grad_norm": 1.5116485357284546,
      "learning_rate": 4.9412317818523744e-05,
      "loss": 0.5964,
      "step": 1000
    },
    {
      "epoch": 0.03526093088857546,
      "eval_accuracy": 0.8373293281429335,
      "eval_loss": 0.6963507533073425,
      "eval_runtime": 2899.1751,
      "eval_samples_per_second": 32.949,
      "eval_steps_per_second": 0.515,
      "step": 1000
    },
    {
      "epoch": 0.05289139633286319,
      "grad_norm": 1.4373358488082886,
      "learning_rate": 4.911847672778562e-05,
      "loss": 0.5661,
      "step": 1500
    },
    {
      "epoch": 0.05289139633286319,
      "eval_accuracy": 0.8443953465863471,
      "eval_loss": 0.6656736731529236,
      "eval_runtime": 2944.9636,
      "eval_samples_per_second": 32.437,
      "eval_steps_per_second": 0.507,
      "step": 1500
    },
    {
      "epoch": 0.07052186177715092,
      "grad_norm": 1.216012716293335,
      "learning_rate": 4.882463563704749e-05,
      "loss": 0.5402,
      "step": 2000
    },
    {
      "epoch": 0.07052186177715092,
      "eval_accuracy": 0.8482718545347777,
      "eval_loss": 0.6440214514732361,
      "eval_runtime": 2944.5243,
      "eval_samples_per_second": 32.442,
      "eval_steps_per_second": 0.507,
      "step": 2000
    },
    {
      "epoch": 0.08815232722143865,
      "grad_norm": 1.0847452878952026,
      "learning_rate": 4.853079454630936e-05,
      "loss": 0.5237,
      "step": 2500
    },
    {
      "epoch": 0.08815232722143865,
      "eval_accuracy": 0.8508165457808422,
      "eval_loss": 0.6308088898658752,
      "eval_runtime": 2933.5042,
      "eval_samples_per_second": 32.564,
      "eval_steps_per_second": 0.509,
      "step": 2500
    }
  ],
  "logging_steps": 500,
  "max_steps": 85080,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 3.70943641780224e+18,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}