cconsti commited on
Commit
75b0bbd
·
verified ·
1 Parent(s): 60093e8

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +7 -4
train.py CHANGED
@@ -23,6 +23,7 @@ if "test" not in dataset:
23
  model_name = "t5-large"
24
  tokenizer = T5Tokenizer.from_pretrained(model_name)
25
  model = T5ForConditionalGeneration.from_pretrained(model_name)
 
26
 
27
  # Define tokenization function
28
  def tokenize_function(examples):
@@ -46,14 +47,16 @@ print("Dataset successfully split and tokenized.")
46
 
47
  # Define training arguments
48
  training_args = TrainingArguments(
49
- output_dir=output_dir,
50
- per_device_train_batch_size=2, # Lowered to avoid memory issues
51
- per_device_eval_batch_size=2,
52
  num_train_epochs=1, # Test run (increase for full fine-tuning)
 
53
  logging_steps=50,
54
  evaluation_strategy="epoch",
55
  save_strategy="epoch",
56
- push_to_hub=False # Change to True to upload the model to HF Hub
 
57
  )
58
 
59
  # Set up Trainer
 
23
  model_name = "t5-large"
24
  tokenizer = T5Tokenizer.from_pretrained(model_name)
25
  model = T5ForConditionalGeneration.from_pretrained(model_name)
26
+ model.gradient_checkpointing_enable()
27
 
28
  # Define tokenization function
29
  def tokenize_function(examples):
 
47
 
48
  # Define training arguments
49
  training_args = TrainingArguments(
50
+ output_dir="/tmp/t5-finetuned",
51
+ per_device_train_batch_size=1, # Reduce to 1 (was 2)
52
+ per_device_eval_batch_size=1, # ✅ Reduce to 1
53
  num_train_epochs=1, # Test run (increase for full fine-tuning)
54
+ gradient_accumulation_steps=4, # ✅ Helps simulate larger batch size
55
  logging_steps=50,
56
  evaluation_strategy="epoch",
57
  save_strategy="epoch",
58
+ push_to_hub=False
59
+ fp16=True
60
  )
61
 
62
  # Set up Trainer