Spaces:

rivapereira123
/

firstaid

Sleeping

rivapereira123 commited on Jul 16

Commit

b7f414a

verified ·

1 Parent(s): b52b1ab

Update finetune_flan_t5.py

Files changed (1) hide show

finetune_flan_t5.py CHANGED Viewed

@@ -14,25 +14,32 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 # Preprocess dataset
 def preprocess(example):
-    input_text = example["instruction"]
     target_text = example["output"]
     tokenized = tokenizer(
         input_text,
         max_length=512,
         truncation=True,
         padding="max_length"
     )
-    with tokenizer.as_target_tokenizer():
-        tokenized["labels"] = tokenizer(
-            target_text,
-            max_length=128,
-            truncation=True,
-            padding="max_length"
-        )["input_ids"]
     return tokenized
 tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)
 # Define training arguments
 training_args = TrainingArguments(
     output_dir="./flan-t5-medical",

 # Preprocess dataset
 def preprocess(example):
+    input_text = example["input"]
     target_text = example["output"]
+    # Tokenize inputs
     tokenized = tokenizer(
         input_text,
         max_length=512,
         truncation=True,
         padding="max_length"
     )
+    # Tokenize targets
+    tokenized_target = tokenizer(
+        target_text,
+        max_length=128,
+        truncation=True,
+        padding="max_length"
+    )
+    tokenized["labels"] = tokenized_target["input_ids"]
     return tokenized
+# Apply preprocessing
 tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)
 # Define training arguments
 training_args = TrainingArguments(
     output_dir="./flan-t5-medical",