# This contains params to be used by the stages to train or predict train: squad_v2: False model_checkpoint: "distilbert-base-uncased" batch_size: 16 max_length: 384 # The maximum length of a feature (question and context) doc_stride: 128 learning_rate: 2e-5 num_train_epochs: 1 weight_decay: 0.01 dataset_name: 'squad'