###### TRAINING | |
# Model parameters | |
EPOCHS = 30 | |
BATCH_SIZE = 8 | |
MAX_LENGTH = 2000 | |
LEARNING_RATE = 3e-4 | |
N_UNFROZEN_LAYERS = 8 | |
UNFREEZE_QUERY = True | |
UNFREEZE_KEY = True | |
UNFREEZE_VALUE = True | |
### Masking parameters - must use either variable or fixed masking rate | |
# var masking rate (choice 1) | |
VAR_MASK_RATE = True # if this is | |
MASK_LOW = 0.15 | |
MASK_HIGH = 0.40 | |
MASK_STEPS = 20 | |
MASK_SCHEDULER = "cosine" # specify the type of scheduler to use. options are: "cosine","loglinear","stepwise" | |
# fixed masking rate (choice 2) | |
MASK_PERCENTAGE = 0.15 # if VAR_MASK_RATE = False, code will use fixed masking rate | |
# To continue training a model you already started, fill in the following parameters | |
FINETUNE_FROM_SCRATCH = True # Set to False if you want to finetune from a checkpoint | |
PATH_TO_STARTING_CKPT = '' # only set the path if FINETUNE_FROM_SCRATCH = False | |
# File paths - do not change unless you move the training dta | |
TRAIN_PATH = '../data/splits/train_df.csv' | |
VAL_PATH = '../data/splits/val_df.csv' | |
TEST_PATH = '../data/splits/test_df.csv' | |
# WandB parameters | |
# Fill these in with your own WandB account info | |
WANDB_PROJECT = '' | |
WANDB_ENTITY = '' | |
WANDB_API_KEY='' | |
# GPU parameters | |
CUDA_VISIBLE_DEVICES = "0" |