File size: 309 Bytes
89556fb
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
    warmup_steps = 5,
    num_train_epochs = 3,
    learning_rate = 5e-5,

    optim="galore_adafactor",
    optim_target_modules=[r".*.attn.*", r".*.mlp.*"],

    weight_decay = 0.03, #L2 reg
    lr_scheduler_type = "linear",  #reduce_lr_on_plateau
    gradient_accumulation_steps = 4,
    use_liger = True,