Update README.md
Browse files
README.md
CHANGED
@@ -26,28 +26,47 @@ https://ai.meta.com/llama/
|
|
26 |
|
27 |
## How to use
|
28 |
|
|
|
|
|
29 |
```
|
30 |
from huggingface_hub import notebook_login
|
31 |
notebook_login()
|
32 |
```
|
33 |
|
34 |
-
```python
|
35 |
-
from peft import PeftModel, PeftConfig
|
36 |
-
from transformers import AutoModelForCausalLM
|
37 |
-
|
38 |
-
config = PeftConfig.from_pretrained("doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese")
|
39 |
-
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
|
40 |
-
model = PeftModel.from_pretrained(model, "doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese")
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
model.eval()
|
|
|
43 |
device = "cuda:0"
|
44 |
-
|
|
|
45 |
inputs = tokenizer(text, return_tensors="pt").to(device)
|
46 |
with torch.no_grad():
|
47 |
outputs = model.generate(**inputs, max_new_tokens=100)
|
48 |
-
|
49 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
50 |
|
|
|
51 |
```
|
52 |
## Training procedure
|
53 |
|
|
|
26 |
|
27 |
## How to use
|
28 |
|
29 |
+
|
30 |
+
|
31 |
```
|
32 |
from huggingface_hub import notebook_login
|
33 |
notebook_login()
|
34 |
```
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
```python
|
38 |
+
import torch
|
39 |
+
from peft import PeftModel
|
40 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
41 |
+
|
42 |
+
model_id = "meta-llama/Llama-2-70b-chat-hf"
|
43 |
+
bnb_config = BitsAndBytesConfig(
|
44 |
+
load_in_4bit=True,
|
45 |
+
bnb_4bit_use_double_quant=True,
|
46 |
+
bnb_4bit_quant_type="nf4",
|
47 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
48 |
+
)
|
49 |
+
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
51 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
|
52 |
+
|
53 |
+
peft_name = "doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese"
|
54 |
+
model = PeftModel.from_pretrained(
|
55 |
+
model,
|
56 |
+
peft_name,
|
57 |
+
is_trainable=True
|
58 |
+
)
|
59 |
model.eval()
|
60 |
+
|
61 |
device = "cuda:0"
|
62 |
+
|
63 |
+
text = "# Q: 日本一高い山は何ですか? # A: "
|
64 |
inputs = tokenizer(text, return_tensors="pt").to(device)
|
65 |
with torch.no_grad():
|
66 |
outputs = model.generate(**inputs, max_new_tokens=100)
|
|
|
67 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
68 |
|
69 |
+
|
70 |
```
|
71 |
## Training procedure
|
72 |
|