add docs
Browse files
    	
        README.md
    CHANGED
    
    | 
         @@ -422,6 +422,12 @@ log_sweep_max_lr: 
     | 
|
| 422 | 
         
             
            optimizer:
         
     | 
| 423 | 
         
             
            # specify weight decay
         
     | 
| 424 | 
         
             
            weight_decay:
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 425 | 
         | 
| 426 | 
         
             
            # whether to bettertransformers
         
     | 
| 427 | 
         
             
            flash_optimum:
         
     | 
| 
         | 
|
| 422 | 
         
             
            optimizer:
         
     | 
| 423 | 
         
             
            # specify weight decay
         
     | 
| 424 | 
         
             
            weight_decay:
         
     | 
| 425 | 
         
            +
            # adamw hyperparams
         
     | 
| 426 | 
         
            +
            adam_beta1:
         
     | 
| 427 | 
         
            +
            adam_beta2:
         
     | 
| 428 | 
         
            +
            adam_epsilon:
         
     | 
| 429 | 
         
            +
            # Gradient clipping max norm
         
     | 
| 430 | 
         
            +
            max_grad_norm:
         
     | 
| 431 | 
         | 
| 432 | 
         
             
            # whether to bettertransformers
         
     | 
| 433 | 
         
             
            flash_optimum:
         
     |