File size: 4,854 Bytes
6a281d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

from unsloth import FastLanguageModel
import torch
import pandas as pd
from datasets import Dataset
import numpy as np
from sklearn.model_selection import train_test_split
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/tinyllama-bnb-4bit", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = False, # @@@ IF YOU GET OUT OF MEMORY - set to True @@@
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an output that provides correct output for that task. Write a response that produces correct solution to the problem

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = "The problem has the following answer. Understand step-by-step how it is solved to produce the correct solution and then produce the correct solution"
    inputs       = examples["Riddle"]
    outputs      = examples["Answer"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }



df = pd.read_csv('math_riddles.csv')
train, test = train_test_split(df, test_size=0.2, random_state=42)
train_ds = Dataset.from_pandas(train)
test_ds = Dataset.from_pandas(test)

tokenized_train = train_ds.map(formatting_prompts_func, batched=True,
                               remove_columns=['Riddle', 'Answer', '__index_level_0__']) # Removing features

tokenized_test = test_ds.map(formatting_prompts_func, batched=True,
                               remove_columns=['Riddle', 'Answer']) # Removing features



trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = tokenized_train,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 24,
    packing = True, # Packs short sequences together to save time!
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 1,
        warmup_ratio = 0.1,
        num_train_epochs = 3,
        learning_rate = 2e-5,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.1,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

trainer_stats = trainer.train()

# Define inference function
def inference(instruction, user_input):
    prompt = alpaca_prompt.format(
        instruction,
        user_input,
        ""  # Leave output blank for generation
    )

    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=64,
        use_cache=True
    )

    # Fix: Define result before printing it
    result = tokenizer.batch_decode(outputs)[0]
    print(result)  # Now you can print it

    # Extract just the generated response (after the prompt)
    response_prefix = "### Response:"
    if response_prefix in result:
        result = result.split(response_prefix)[1].strip()

    return result

# Create Gradio interface
import gradio as gr
demo = gr.Interface(
    fn=inference,
    inputs=[
        gr.Textbox(label="Instruction", value="Solve the problem"),
        gr.Textbox(label="Input", value="There is a three digit number.The second digit is four times as big as the third digit, while the first digit is three less than the second digit.What is the number?")
    ],
    outputs="text",
    title="Language Model Interface",
    description="Enter an instruction and input to generate a response from the model."
)


demo.launch(share=True)