Spaces:
Sleeping
Sleeping
Update finetune_flan_t5.py
Browse files- finetune_flan_t5.py +7 -4
finetune_flan_t5.py
CHANGED
@@ -16,9 +16,12 @@ model_name = "google/flan-t5-base"
|
|
16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
17 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
18 |
|
19 |
-
# 3. Formatting function
|
20 |
-
def format_instruction(
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
# 4. Training arguments
|
24 |
training_args = TrainingArguments(
|
@@ -34,7 +37,7 @@ training_args = TrainingArguments(
|
|
34 |
report_to="none"
|
35 |
)
|
36 |
|
37 |
-
# 5. Initialize SFTTrainer
|
38 |
trainer = SFTTrainer(
|
39 |
model=model,
|
40 |
tokenizer=tokenizer,
|
|
|
16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
17 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
18 |
|
19 |
+
# 3. CORRECTED Formatting function - must return a list
|
20 |
+
def format_instruction(examples):
|
21 |
+
texts = []
|
22 |
+
for input_text, output_text in zip(examples["input"], examples["output"]):
|
23 |
+
texts.append(f"### Instruction:\n{input_text}\n\n### Response:\n{output_text}")
|
24 |
+
return {"text": texts} # Return dict with "text" key containing list
|
25 |
|
26 |
# 4. Training arguments
|
27 |
training_args = TrainingArguments(
|
|
|
37 |
report_to="none"
|
38 |
)
|
39 |
|
40 |
+
# 5. Initialize SFTTrainer
|
41 |
trainer = SFTTrainer(
|
42 |
model=model,
|
43 |
tokenizer=tokenizer,
|