Spaces:
Running
Running
File size: 2,081 Bytes
ea05a45 09e908d a6cd710 ea05a45 09e908d ea05a45 09e908d a6cd710 09e908d d609d8c 09e908d 804eca3 09e908d 846211a ea05a45 846211a ea05a45 09e908d ea05a45 09e908d 846211a 09e908d 846211a 09e908d 846211a 09e908d 846211a 09e908d ea05a45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
import torch
# Load SmolLM-135M-Instruct model with 4-bit quantization
model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto",
)
# Prepare PEFT config for efficient fine-tuning
peft_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
# Load dataset (example: assume 'financial_data.jsonl' with {'text': 'query ||| response'} format; incorporate system prompt in data prep if needed)
dataset = load_dataset("json", data_files="financial_data.jsonl", split="train")
# Training arguments (adjusted for smaller model: larger batch size for speed)
training_args = TrainingArguments(
output_dir="./finetuned_smollm135m",
num_train_epochs=3,
per_device_train_batch_size=8, # Increased for smaller model
gradient_accumulation_steps=2,
learning_rate=2e-4,
fp16=True if torch.cuda.is_available() else False,
save_steps=500,
logging_steps=100,
optim="paged_adamw_8bit",
weight_decay=0.01,
warmup_steps=100,
)
# Trainer
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text", # Adjust based on your dataset
tokenizer=tokenizer,
max_seq_length=512,
)
trainer.train()
# Save fine-tuned model
trainer.model.save_pretrained("./finetuned_smollm135m")
tokenizer.save_pretrained("./finetuned_smollm135m") |