Asuncom commited on
Commit
f51e896
·
verified ·
1 Parent(s): 8131062

Rename README.md to 微调llama3.md

Browse files
Files changed (2) hide show
  1. README.md +0 -3
  2. 微调llama3.md +294 -0
README.md DELETED
@@ -1,3 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
微调llama3.md ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ ```python
5
+ !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
6
+ ```
7
+
8
+ ```python
9
+ !pip install --upgrade pip
10
+ ```
11
+
12
+ ```python
13
+ !pip install --no-deps "xformers<0.0.26" "trl<0.9.0" peft accelerate bitsandbytes
14
+ ```
15
+
16
+ ```python
17
+ from unsloth import FastLanguageModel
18
+ import torch
19
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
20
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
21
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
22
+
23
+ # 4bit pre quantized models we support for 4x faster downloading + no OOMs.
24
+ fourbit_models = [
25
+ "unsloth/mistral-7b-v0.3-bnb-4bit", # New Mistral v3 2x faster!
26
+ "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
27
+ "unsloth/llama-3-8b-bnb-4bit", # Llama-3 15 trillion tokens model 2x faster!
28
+ "unsloth/llama-3-8b-Instruct-bnb-4bit",
29
+ "unsloth/llama-3-70b-bnb-4bit",
30
+ "unsloth/Phi-3-mini-4k-instruct", # Phi-3 2x faster!
31
+ "unsloth/Phi-3-medium-4k-instruct",
32
+ "unsloth/mistral-7b-bnb-4bit",
33
+ "unsloth/gemma-7b-bnb-4bit", # Gemma 2.2x faster!
34
+ ] # More models at https://huggingface.co/unsloth
35
+
36
+ model, tokenizer = FastLanguageModel.from_pretrained(
37
+ model_name = "unsloth/llama-3-8b-bnb-4bit",
38
+ max_seq_length = max_seq_length,
39
+ dtype = dtype,
40
+ load_in_4bit = load_in_4bit,
41
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
42
+ )
43
+ ```
44
+
45
+ ```python
46
+ # ========================================================
47
+ # Test before training
48
+ # ========================================================
49
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
50
+
51
+ ### Instruction:
52
+ {}
53
+
54
+ ### Input:
55
+ {}
56
+
57
+ ### Response:
58
+ {}"""
59
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
60
+ inputs = tokenizer(
61
+ [
62
+ alpaca_prompt.format(
63
+ "请把现代汉语翻译成古文", # instruction
64
+ "其品行廉正,所以至死也不放松对自己的要求。", # input
65
+ "", # output - leave this blank for generation!
66
+ )
67
+ ], return_tensors = "pt").to("cuda")
68
+
69
+ from transformers import TextStreamer
70
+ text_streamer = TextStreamer(tokenizer)
71
+ _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
72
+ ```
73
+
74
+ ```python
75
+ model = FastLanguageModel.get_peft_model(
76
+ model,
77
+ r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
78
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
79
+ "gate_proj", "up_proj", "down_proj",],
80
+ lora_alpha = 16,
81
+ lora_dropout = 0, # Supports any, but = 0 is optimized
82
+ bias = "none", # Supports any, but = "none" is optimized
83
+ # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
84
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
85
+ random_state = 3407,
86
+ use_rslora = False, # We support rank stabilized LoRA
87
+ loftq_config = None, # And LoftQ
88
+ )
89
+ ```
90
+
91
+ ```python
92
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
93
+
94
+ ### Instruction:
95
+ {}
96
+
97
+ ### Input:
98
+ {}
99
+
100
+ ### Response:
101
+ {}"""
102
+
103
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
104
+ def formatting_prompts_func(examples):
105
+ instructions = examples["instruction"]
106
+ inputs = examples["input"]
107
+ outputs = examples["output"]
108
+ texts = []
109
+ for instruction, input, output in zip(instructions, inputs, outputs):
110
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
111
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
112
+ texts.append(text)
113
+ return { "text" : texts, }
114
+ pass
115
+
116
+ from datasets import load_dataset
117
+ dataset = load_dataset("Asuncom/shiji-qishiliezhuan", split = "train")
118
+ dataset = dataset.map(formatting_prompts_func, batched = True,)
119
+ ```
120
+
121
+ ```python
122
+ from trl import SFTTrainer
123
+ from transformers import TrainingArguments
124
+ from unsloth import is_bfloat16_supported
125
+
126
+ trainer = SFTTrainer(
127
+ model = model,
128
+ tokenizer = tokenizer,
129
+ train_dataset = dataset,
130
+ dataset_text_field = "text",
131
+ max_seq_length = max_seq_length,
132
+ dataset_num_proc = 2,
133
+ packing = False, # Can make training 5x faster for short sequences.
134
+ args = TrainingArguments(
135
+ per_device_train_batch_size = 2,
136
+ gradient_accumulation_steps = 4,
137
+ warmup_steps = 5,
138
+ # num_train_epochs = 1, # Set this for 1 full training run.
139
+ max_steps = 100,
140
+ learning_rate = 2e-4,
141
+ fp16 = not is_bfloat16_supported(),
142
+ bf16 = is_bfloat16_supported(),
143
+ logging_steps = 1,
144
+ optim = "adamw_8bit",
145
+ weight_decay = 0.01,
146
+ lr_scheduler_type = "linear",
147
+ seed = 3407,
148
+ output_dir = "outputs",
149
+ ),
150
+ )
151
+ ```
152
+
153
+ ```python
154
+ #@title Show current memory stats
155
+ gpu_stats = torch.cuda.get_device_properties(0)
156
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
157
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
158
+ print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
159
+ print(f"{start_gpu_memory} GB of memory reserved.")
160
+ ```
161
+
162
+ ```python
163
+ import wandb
164
+
165
+ # 初始化一个离线模式的W&B运行
166
+ wandb.init(mode="offline", project="asuncom", entity="asuncom")
167
+ ```
168
+
169
+ ```python
170
+ trainer_stats = trainer.train()
171
+ ```
172
+
173
+ ```python
174
+ #@title Show final memory and time stats
175
+ used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
176
+ used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
177
+ used_percentage = round(used_memory /max_memory*100, 3)
178
+ lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
179
+ print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
180
+ print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
181
+ print(f"Peak reserved memory = {used_memory} GB.")
182
+ print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
183
+ print(f"Peak reserved memory % of max memory = {used_percentage} %.")
184
+ print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
185
+ ```
186
+
187
+ ```python
188
+ # alpaca_prompt = Copied from above
189
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
190
+ inputs = tokenizer(
191
+ [
192
+ alpaca_prompt.format(
193
+ "请把现代汉语翻译成古文", # instruction
194
+ "其品行廉正,所以至死也不放松对自己的要求。", # input
195
+ "", # output - leave this blank for generation!
196
+ )
197
+ ], return_tensors = "pt").to("cuda")
198
+
199
+ from transformers import TextStreamer
200
+ text_streamer = TextStreamer(tokenizer)
201
+ _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
202
+ ```
203
+
204
+ ```python
205
+ model.save_pretrained("lora_model") # Local saving
206
+ tokenizer.save_pretrained("lora_model")
207
+ model.push_to_hub("Asuncom/Llama-3-8B-bnb-4bit-shiji", token = "hf_huggingface密钥XqWUItzvbAkNeKb") # Online saving
208
+ tokenizer.push_to_hub("Asuncom/Llama-3-8B-bnb-4bit-shiji", token = "hf_gUYYWvhuggingface密钥zvbAkNeKb") # Online saving
209
+ ```
210
+
211
+ ```python
212
+ if False:
213
+ from unsloth import FastLanguageModel
214
+ model, tokenizer = FastLanguageModel.from_pretrained(
215
+ model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
216
+ max_seq_length = max_seq_length,
217
+ dtype = dtype,
218
+ load_in_4bit = load_in_4bit,
219
+ )
220
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
221
+
222
+ # alpaca_prompt = You MUST copy from above!
223
+
224
+ inputs = tokenizer(
225
+ [
226
+ alpaca_prompt.format(
227
+ "What is a famous tall tower in Paris?", # instruction
228
+ "", # input
229
+ "", # output - leave this blank for generation!
230
+ )
231
+ ], return_tensors = "pt").to("cuda")
232
+
233
+ from transformers import TextStreamer
234
+ text_streamer = TextStreamer(tokenizer)
235
+ _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
236
+ ```
237
+
238
+ ```python
239
+ # Merge to 16bit
240
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
241
+ if False: model.push_to_hub_merged("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, save_method = "merged_16bit", token = "hf_huggingface密钥XqWUItzvbAkNeKb")
242
+
243
+ # Merge to 4bit
244
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
245
+ if False: model.push_to_hub_merged("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, save_method = "merged_4bit", token = "hf_gUYYWvhuggingface密钥zvbAkNeKb")
246
+
247
+ # Just LoRA adapters
248
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
249
+ if False: model.push_to_hub_merged("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, save_method = "lora", token = "hf_gUYYWvvzxjWLhuggingface密钥eKb")
250
+ ```
251
+
252
+ ```python
253
+ # Save to 8bit Q8_0
254
+ if False: model.save_pretrained_gguf("model", tokenizer,)
255
+ # Remember to go to https://huggingface.co/settings/tokens for a token!
256
+ # And change hf to your username!
257
+ if False: model.push_to_hub_gguf("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, token = "")
258
+
259
+ # Save to 16bit GGUF
260
+ if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
261
+ if False: model.push_to_hub_gguf("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, quantization_method = "f16", token = "")
262
+
263
+ # Save to q4_k_m GGUF
264
+ if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
265
+ if True: model.push_to_hub_gguf("Asuncom/Llama-3-8B-bnb-4bit-shiji", tokenizer, quantization_method = "q4_k_m", token = "hf_xxxxx")
266
+
267
+ # Save to multiple GGUF options - much faster if you want multiple!
268
+ if False:
269
+ model.push_to_hub_gguf(
270
+ "Asuncom/Llama-3-8B-bnb-4bit-shiji", # Change hf to your username!
271
+ tokenizer,
272
+ quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
273
+ token = "hf_huggingface密钥XqWUItzvbAkNeKb", # Get a token at https://huggingface.co/settings/tokens
274
+ )
275
+ ```
276
+
277
+ ```python
278
+ model.push_to_hub_gguf(
279
+ "Asuncom/Llama-3-8B-bnb-4bit-shiji", # Change hf to your username!
280
+ tokenizer,
281
+ quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
282
+ token = "hf_huggingface密钥XqWUItzvbAkNeKb", # Get a token at https://huggingface.co/settings/tokens
283
+ )
284
+ ```
285
+
286
+ ```
287
+ Saved GGUF to https://huggingface.co/Asuncom/Llama-3-8B-bnb-4bit-shiji
288
+ Unsloth: Uploading GGUF to Huggingface Hub...
289
+ Saved GGUF to https://huggingface.co/Asuncom/Llama-3-8B-bnb-4bit-shiji
290
+ Unsloth: Uploading GGUF to Huggingface Hub...
291
+ Saved GGUF to https://huggingface.co/Asuncom/Llama-3-8B-bnb-4bit-shiji
292
+ Unsloth: Uploading GGUF to Huggingface Hub...
293
+ Saved GGUF to https://huggingface.co/Asuncom/Llama-3-8B-bnb-4bit-shiji
294
+ ```