File size: 309 Bytes
89556fb |
1 2 3 4 5 6 7 8 9 10 11 12 |
warmup_steps = 5,
num_train_epochs = 3,
learning_rate = 5e-5,
optim="galore_adafactor",
optim_target_modules=[r".*.attn.*", r".*.mlp.*"],
weight_decay = 0.03, #L2 reg
lr_scheduler_type = "linear", #reduce_lr_on_plateau
gradient_accumulation_steps = 4,
use_liger = True,
|