File size: 2,081 Bytes
ea05a45
09e908d
 
 
a6cd710
 
ea05a45
 
 
 
 
 
 
09e908d
 
 
ea05a45
09e908d
a6cd710
 
09e908d
 
 
 
 
 
 
 
d609d8c
09e908d
 
804eca3
09e908d
846211a
ea05a45
846211a
ea05a45
09e908d
ea05a45
 
09e908d
 
 
 
 
846211a
09e908d
846211a
 
09e908d
 
846211a
 
09e908d
 
 
 
 
846211a
 
 
 
09e908d
ea05a45
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
import torch

# Load SmolLM-135M-Instruct model with 4-bit quantization
model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
)

# Prepare PEFT config for efficient fine-tuning
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

# Load dataset (example: assume 'financial_data.jsonl' with {'text': 'query ||| response'} format; incorporate system prompt in data prep if needed)
dataset = load_dataset("json", data_files="financial_data.jsonl", split="train")

# Training arguments (adjusted for smaller model: larger batch size for speed)
training_args = TrainingArguments(
    output_dir="./finetuned_smollm135m",
    num_train_epochs=3,
    per_device_train_batch_size=8,  # Increased for smaller model
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    fp16=True if torch.cuda.is_available() else False,
    save_steps=500,
    logging_steps=100,
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    warmup_steps=100,
)

# Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",  # Adjust based on your dataset
    tokenizer=tokenizer,
    max_seq_length=512,
)

trainer.train()

# Save fine-tuned model
trainer.model.save_pretrained("./finetuned_smollm135m")
tokenizer.save_pretrained("./finetuned_smollm135m")