Spaces:

cconsti
/

trial1

Runtime error

App Files Files Community

cconsti commited on Jan 31

Commit

3cef3e3

verified ·

1 Parent(s): 0299858

Create train.py

Browse files

Files changed (1) hide show

train.py +50 -0

train.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
+from datasets import load_dataset
+# Load dataset (Replace this with your dataset)
+dataset = load_dataset("mbzuai/NLP-Cover-Letters")  # Example dataset
+# Load model and tokenizer
+model_name = "t5-large"
+tokenizer = T5Tokenizer.from_pretrained(model_name)
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+# Tokenization function
+def tokenize_function(examples):
+    inputs = [ex["input"] for ex in examples]
+    targets = [ex["output"] for ex in examples]
+    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
+    labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
+    model_inputs["labels"] = labels["input_ids"]
+    return model_inputs
+# Apply tokenization
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./t5-finetuned",
+    per_device_train_batch_size=2,  # Smaller batch to avoid memory errors
+    per_device_eval_batch_size=2,  # Smaller eval batch
+    save_total_limit=1,  # Keep only 1 checkpoint
+    num_train_epochs=1,  # Quick test with 1 epoch
+    logging_steps=50,  # More frequent logging
+    evaluation_strategy="epoch",  # Evaluate only at the end of the epoch
+    save_strategy="epoch",  # Save only at the end of the epoch
+    push_to_hub=False  # Avoid pushing test model to Hugging Face Hub
+)
+# Trainer setup
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_datasets["train"],
+    eval_dataset=tokenized_datasets["test"],
+)
+# Train the model
+trainer.train()
+# Save and push model to Hugging Face Hub
+trainer.push_to_hub("your-hf-username/t5-cover-letter")