flan-t5-custom-handler / training_config.py
MjolnirThor's picture
Update training_config.py
a96669d verified
raw
history blame contribute delete
630 Bytes
from transformers import TrainingArguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=50,
save_steps=500,
eval_steps=500,
evaluation_strategy="steps",
save_strategy="steps",
save_total_limit=3,
load_best_model_at_end=True,
learning_rate=5e-5,
fp16=True,
gradient_checkpointing=True,
# Remove CPU-only settings
no_cuda=False, # Allow GPU usage
use_cpu=False # Allow GPU usage
)