rivapereira123 commited on
Commit
1515e9b
·
verified ·
1 Parent(s): 8a20cad

Update finetune_flan_t5.py

Browse files
Files changed (1) hide show
  1. finetune_flan_t5.py +7 -4
finetune_flan_t5.py CHANGED
@@ -16,9 +16,12 @@ model_name = "google/flan-t5-base"
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
18
 
19
- # 3. Formatting function for SFTTrainer
20
- def format_instruction(example):
21
- return f"### Instruction:\n{example['input']}\n\n### Response:\n{example['output']}"
 
 
 
22
 
23
  # 4. Training arguments
24
  training_args = TrainingArguments(
@@ -34,7 +37,7 @@ training_args = TrainingArguments(
34
  report_to="none"
35
  )
36
 
37
- # 5. Initialize SFTTrainer correctly
38
  trainer = SFTTrainer(
39
  model=model,
40
  tokenizer=tokenizer,
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
18
 
19
+ # 3. CORRECTED Formatting function - must return a list
20
+ def format_instruction(examples):
21
+ texts = []
22
+ for input_text, output_text in zip(examples["input"], examples["output"]):
23
+ texts.append(f"### Instruction:\n{input_text}\n\n### Response:\n{output_text}")
24
+ return {"text": texts} # Return dict with "text" key containing list
25
 
26
  # 4. Training arguments
27
  training_args = TrainingArguments(
 
37
  report_to="none"
38
  )
39
 
40
+ # 5. Initialize SFTTrainer
41
  trainer = SFTTrainer(
42
  model=model,
43
  tokenizer=tokenizer,