Spaces:
Runtime error
Runtime error
File size: 4,854 Bytes
6a281d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
from unsloth import FastLanguageModel
import torch
import pandas as pd
from datasets import Dataset
import numpy as np
from sklearn.model_selection import train_test_split
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/tinyllama-bnb-4bit", # "unsloth/tinyllama" for 16bit loading
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 32,
lora_dropout = 0, # Currently only supports dropout = 0
bias = "none", # Currently only supports bias = "none"
use_gradient_checkpointing = False, # @@@ IF YOU GET OUT OF MEMORY - set to True @@@
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
alpaca_prompt = """Below is an instruction that describes a task, paired with an output that provides correct output for that task. Write a response that produces correct solution to the problem
### Instruction:
{}
### Input:
{}
### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
instructions = "The problem has the following answer. Understand step-by-step how it is solved to produce the correct solution and then produce the correct solution"
inputs = examples["Riddle"]
outputs = examples["Answer"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
df = pd.read_csv('math_riddles.csv')
train, test = train_test_split(df, test_size=0.2, random_state=42)
train_ds = Dataset.from_pandas(train)
test_ds = Dataset.from_pandas(test)
tokenized_train = train_ds.map(formatting_prompts_func, batched=True,
remove_columns=['Riddle', 'Answer', '__index_level_0__']) # Removing features
tokenized_test = test_ds.map(formatting_prompts_func, batched=True,
remove_columns=['Riddle', 'Answer']) # Removing features
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = tokenized_train,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 24,
packing = True, # Packs short sequences together to save time!
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 1,
warmup_ratio = 0.1,
num_train_epochs = 3,
learning_rate = 2e-5,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.1,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
trainer_stats = trainer.train()
# Define inference function
def inference(instruction, user_input):
prompt = alpaca_prompt.format(
instruction,
user_input,
"" # Leave output blank for generation
)
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=64,
use_cache=True
)
# Fix: Define result before printing it
result = tokenizer.batch_decode(outputs)[0]
print(result) # Now you can print it
# Extract just the generated response (after the prompt)
response_prefix = "### Response:"
if response_prefix in result:
result = result.split(response_prefix)[1].strip()
return result
# Create Gradio interface
import gradio as gr
demo = gr.Interface(
fn=inference,
inputs=[
gr.Textbox(label="Instruction", value="Solve the problem"),
gr.Textbox(label="Input", value="There is a three digit number.The second digit is four times as big as the third digit, while the first digit is three less than the second digit.What is the number?")
],
outputs="text",
title="Language Model Interface",
description="Enter an instruction and input to generate a response from the model."
)
demo.launch(share=True) |