Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -34,16 +34,22 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
|
34 |
if tokenizer.pad_token is None:
|
35 |
tokenizer.pad_token = tokenizer.eos_token
|
36 |
|
37 |
-
# β
Tokenize Data
|
38 |
def preprocess_function(examples):
|
39 |
prompt = examples.get("prompt", "")
|
40 |
response = examples.get("response", "")
|
41 |
inputs = f"Medical Q&A: {prompt} {response}"
|
|
|
42 |
model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
|
43 |
-
model_inputs["labels"] = model_inputs["input_ids"].copy()
|
44 |
-
return model_inputs
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# β
Load Model with LoRA (Optimized for Falcon)
|
49 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
34 |
if tokenizer.pad_token is None:
|
35 |
tokenizer.pad_token = tokenizer.eos_token
|
36 |
|
37 |
+
# β
Tokenize Data (Fixed)
|
38 |
def preprocess_function(examples):
|
39 |
prompt = examples.get("prompt", "")
|
40 |
response = examples.get("response", "")
|
41 |
inputs = f"Medical Q&A: {prompt} {response}"
|
42 |
+
|
43 |
model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
|
|
|
|
|
44 |
|
45 |
+
# β
Ensure labels have the same length
|
46 |
+
model_inputs["labels"] = model_inputs["input_ids"]
|
47 |
+
|
48 |
+
return {key: [val] for key, val in model_inputs.items()} # β
Wrap values in lists
|
49 |
+
|
50 |
+
# β
Apply tokenization
|
51 |
+
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset.column_names)
|
52 |
+
|
53 |
|
54 |
# β
Load Model with LoRA (Optimized for Falcon)
|
55 |
model = AutoModelForCausalLM.from_pretrained(
|