kimura commited on
Commit
5185206
·
1 Parent(s): a1c300c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -9
README.md CHANGED
@@ -26,28 +26,47 @@ https://ai.meta.com/llama/
26
 
27
  ## How to use
28
 
 
 
29
  ```
30
  from huggingface_hub import notebook_login
31
  notebook_login()
32
  ```
33
 
34
- ```python
35
- from peft import PeftModel, PeftConfig
36
- from transformers import AutoModelForCausalLM
37
-
38
- config = PeftConfig.from_pretrained("doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese")
39
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
40
- model = PeftModel.from_pretrained(model, "doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese")
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  model.eval()
 
43
  device = "cuda:0"
44
- text = "#Q:日本一高い山は何ですか? #A: "
 
45
  inputs = tokenizer(text, return_tensors="pt").to(device)
46
  with torch.no_grad():
47
  outputs = model.generate(**inputs, max_new_tokens=100)
48
-
49
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
50
 
 
51
  ```
52
  ## Training procedure
53
 
 
26
 
27
  ## How to use
28
 
29
+
30
+
31
  ```
32
  from huggingface_hub import notebook_login
33
  notebook_login()
34
  ```
35
 
 
 
 
 
 
 
 
36
 
37
+ ```python
38
+ import torch
39
+ from peft import PeftModel
40
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
41
+
42
+ model_id = "meta-llama/Llama-2-70b-chat-hf"
43
+ bnb_config = BitsAndBytesConfig(
44
+ load_in_4bit=True,
45
+ bnb_4bit_use_double_quant=True,
46
+ bnb_4bit_quant_type="nf4",
47
+ bnb_4bit_compute_dtype=torch.bfloat16,
48
+ )
49
+
50
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
51
+ model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
52
+
53
+ peft_name = "doshisha-nlp/llama-2-70b-chat-4bit-alpaca-japanese"
54
+ model = PeftModel.from_pretrained(
55
+ model,
56
+ peft_name,
57
+ is_trainable=True
58
+ )
59
  model.eval()
60
+
61
  device = "cuda:0"
62
+
63
+ text = "# Q: 日本一高い山は何ですか? # A: "
64
  inputs = tokenizer(text, return_tensors="pt").to(device)
65
  with torch.no_grad():
66
  outputs = model.generate(**inputs, max_new_tokens=100)
 
67
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
68
 
69
+
70
  ```
71
  ## Training procedure
72