Spaces:

cconsti
/

trial1

Runtime error

cconsti commited on Feb 2

Commit

6b4b78f

verified ·

1 Parent(s): 0d716fc

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -6,8 +6,10 @@ from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, Train
 os.environ["HF_HOME"] = "/app/hf_cache"
 os.environ["HF_DATASETS_CACHE"] = "/app/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
-output_dir = "/tmp/t5-finetuned"
-os.makedirs(output_dir, exist_ok=True)
 # Load dataset
 dataset = load_dataset("tatsu-lab/alpaca")
 dataset["train"] = dataset["train"].select(range(2000))
@@ -48,19 +50,20 @@ print("Dataset successfully split and tokenized.")
 #  Define training arguments
 training_args = TrainingArguments(
-    output_dir="/tmp/t5-finetuned",
-    per_device_train_batch_size=1,
-    per_device_eval_batch_size=1,
-    num_train_epochs=1,  # ✅ Train for 1 epoch only
-    gradient_accumulation_steps=2,  # ✅ Reduce steps to speed up
-    logging_steps=100,  # ✅ Log less frequently
-    save_steps=500,  # ✅ Save less frequently
     evaluation_strategy="epoch",
     save_strategy="epoch",
-    push_to_hub=False,
     fp16=True
 )
 # Set up Trainer
 trainer = Trainer(
     model=model,

 os.environ["HF_HOME"] = "/app/hf_cache"
 os.environ["HF_DATASETS_CACHE"] = "/app/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
+osave_dir = "./models/t5-finetuned"
+os.makedirs(save_dir, exist_ok=True)  # Ensure the directory exists
+trainer.save_model(save_dir)
 # Load dataset
 dataset = load_dataset("tatsu-lab/alpaca")
 dataset["train"] = dataset["train"].select(range(2000))
 #  Define training arguments
 training_args = TrainingArguments(
+    output_dir="./results",
     evaluation_strategy="epoch",
     save_strategy="epoch",
+    learning_rate=5e-6,  # Reduce from 5e-5 to 5e-6
+    per_device_train_batch_size=8,  # Keep batch size reasonable
+    per_device_eval_batch_size=8,
+    num_train_epochs=3,
+    weight_decay=0.01,
+    logging_dir="./logs",
+    logging_steps=10,
     fp16=True
 )
 # Set up Trainer
 trainer = Trainer(
     model=model,