Spaces:
Sleeping
Sleeping
Update finetune_flan_t5.py
Browse files- finetune_flan_t5.py +15 -8
finetune_flan_t5.py
CHANGED
@@ -14,25 +14,32 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
14 |
|
15 |
# Preprocess dataset
|
16 |
def preprocess(example):
|
17 |
-
input_text = example["
|
18 |
target_text = example["output"]
|
|
|
|
|
19 |
tokenized = tokenizer(
|
20 |
input_text,
|
21 |
max_length=512,
|
22 |
truncation=True,
|
23 |
padding="max_length"
|
24 |
)
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
32 |
return tokenized
|
33 |
|
|
|
34 |
tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)
|
35 |
|
|
|
36 |
# Define training arguments
|
37 |
training_args = TrainingArguments(
|
38 |
output_dir="./flan-t5-medical",
|
|
|
14 |
|
15 |
# Preprocess dataset
|
16 |
def preprocess(example):
|
17 |
+
input_text = example["input"]
|
18 |
target_text = example["output"]
|
19 |
+
|
20 |
+
# Tokenize inputs
|
21 |
tokenized = tokenizer(
|
22 |
input_text,
|
23 |
max_length=512,
|
24 |
truncation=True,
|
25 |
padding="max_length"
|
26 |
)
|
27 |
+
|
28 |
+
# Tokenize targets
|
29 |
+
tokenized_target = tokenizer(
|
30 |
+
target_text,
|
31 |
+
max_length=128,
|
32 |
+
truncation=True,
|
33 |
+
padding="max_length"
|
34 |
+
)
|
35 |
+
tokenized["labels"] = tokenized_target["input_ids"]
|
36 |
+
|
37 |
return tokenized
|
38 |
|
39 |
+
# Apply preprocessing
|
40 |
tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)
|
41 |
|
42 |
+
|
43 |
# Define training arguments
|
44 |
training_args = TrainingArguments(
|
45 |
output_dir="./flan-t5-medical",
|