|
|
|
from datasets import load_dataset |
|
from accelerate import find_executable_batch_size |
|
from transformers import TrainingArguments, BitsAndBytesConfig |
|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
from peft import PeftModel, LoraConfig |
|
from trl import SFTTrainer |
|
|
|
|
|
tune_data = load_dataset("csv", data_files='Political_Promises_Fine_Tuning.csv') |
|
|
|
optim = "paged_adamw_32bit" |
|
learning_rate = 3e-4 |
|
weight_decay = .01 |
|
lr_scheduler_type = 'cosine' |
|
warmup_ratio = .03 |
|
|
|
|
|
gradient_accumulation_steps = 4 |
|
bf16 = True |
|
gradient_checkpointing = True |
|
|
|
|
|
|
|
|
|
|
|
label_smoothing_factor = .1 |
|
|
|
neftune_noise_alpha = 5 |
|
|
|
|
|
|
|
per_device_train_batch_size = find_executable_batch_size() |
|
per_device_eval_batch_size = find_executable_batch_size() |
|
|
|
max_grad_norm = 2 |
|
group_by_length = True |
|
max_train_epochs = 3 |
|
|
|
|
|
|
|
r = 64 |
|
lora_alpha = 8 |
|
lora_dropout = 0.1 |
|
|
|
|
|
use_4bit = True |
|
bnb_4bit_compute_dtype = 'float16' |
|
bnb_4bit_quant_type = 'nf4' |
|
use_nested_quant = False |
|
|
|
|
|
max_seq_length = 128 |
|
|
|
|
|
packing = True |
|
|
|
train_params = TrainingArguments( |
|
optim=optim, |
|
learning_rate=learning_rate, |
|
weight_decay=weight_decay, |
|
warmup_ratio=warmup_ratio, |
|
gradient_accumulation_steps=gradient_accumulation_steps, |
|
bf16=bf16, |
|
gradient_checkpointing=gradient_checkpointing, |
|
label_smoothing_factor=label_smoothing_factor, |
|
neftune_noise_alpha=neftune_noise_alpha, |
|
per_device_train_batch_size=per_device_train_batch_size, |
|
per_device_eval_batch_size=per_device_eval_batch_size, |
|
max_grad_norm=max_grad_norm, |
|
group_by_length=group_by_length, |
|
num_train_epochs=max_train_epochs, |
|
output_dir='./model_outputs', |
|
save_steps=50, |
|
logging_steps=10 |
|
) |
|
|
|
quantize_params = BitsAndBytesConfig( |
|
use_4bit=use_4bit, |
|
bnb_4bit_compute_dtype=bnb_4bit_compute_dtype, |
|
bnb_4bit_quant_type=bnb_4bit_quant_type, |
|
use_nested_quant=use_nested_quant, |
|
) |
|
|
|
|
|
lora_params = LoraConfig ( |
|
r = 64, |
|
lora_alpha = 8, |
|
lora_dropout = 0.1 |
|
) |
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
pretrained_model_name_or_path = 'meta-llama/Llama-2-7b', |
|
quantization_config=quantize_params, |
|
device_map='auto' |
|
) |
|
|
|
tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b') |
|
|
|
tune_data = load_dataset("csv", data_files='/path/to/finetune_data.csv') |
|
|
|
sft = SFTTrainer ( |
|
model = model, |
|
args = train_params, |
|
train_dataset = tune_data, |
|
tokenizer = tokenizer |
|
peft_config = lora_params, |
|
max_seq_length = 128, |
|
dataset_text_field = 'text', |
|
packing = True |
|
) |
|
|
|
sft.train() |
|
sft.model.save_pretrained('/path/to/llama-2-it.csv') |
|
|