Update README.md
Browse files
README.md
CHANGED
@@ -42,6 +42,18 @@ For more details, please consult the associated [Github repository](https://gith
|
|
42 |
# How to use it
|
43 |
|
44 |
```Python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
model = AutoModelForCausalLM.from_pretrained(
|
46 |
"meta-llama/Llama-2-13b-hf",
|
47 |
load_in_4bit=True,
|
@@ -55,13 +67,21 @@ model = PeftModel.from_pretrained(
|
|
55 |
"sag-uniroma2/u-depp-llama-2-13b"
|
56 |
)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
input_string = "He was most widely recognized for some of his books."
|
59 |
prompt = f"""
|
60 |
### Input:
|
61 |
{input_string}
|
62 |
### Answer:"""
|
63 |
|
64 |
-
inputs = tokenizer(prompt, return_tensors="pt", padding=
|
65 |
input_ids = inputs["input_ids"].to(model.device)
|
66 |
|
67 |
with torch.no_grad():
|
@@ -70,7 +90,7 @@ with torch.no_grad():
|
|
70 |
generation_config=generation_config,
|
71 |
return_dict_in_generate=True,
|
72 |
output_scores=True,
|
73 |
-
max_new_tokens=
|
74 |
use_cache=True,
|
75 |
)
|
76 |
s = gen_outputs.sequences[0]
|
|
|
42 |
# How to use it
|
43 |
|
44 |
```Python
|
45 |
+
import transformers
|
46 |
+
import torch
|
47 |
+
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
48 |
+
from peft import PeftModel
|
49 |
+
|
50 |
+
quant_config = BitsAndBytesConfig(
|
51 |
+
load_in_4bit=True,
|
52 |
+
bnb_4bit_use_double_quant=True,
|
53 |
+
bnb_4bit_quant_type="nf4",
|
54 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
55 |
+
)
|
56 |
+
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
"meta-llama/Llama-2-13b-hf",
|
59 |
load_in_4bit=True,
|
|
|
67 |
"sag-uniroma2/u-depp-llama-2-13b"
|
68 |
)
|
69 |
|
70 |
+
generation_config = GenerationConfig(
|
71 |
+
num_beams=4,
|
72 |
+
do_sample=False,
|
73 |
+
early_stopping=True,
|
74 |
+
)
|
75 |
+
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-hf", trust_remote_code=True)
|
77 |
+
|
78 |
input_string = "He was most widely recognized for some of his books."
|
79 |
prompt = f"""
|
80 |
### Input:
|
81 |
{input_string}
|
82 |
### Answer:"""
|
83 |
|
84 |
+
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
85 |
input_ids = inputs["input_ids"].to(model.device)
|
86 |
|
87 |
with torch.no_grad():
|
|
|
90 |
generation_config=generation_config,
|
91 |
return_dict_in_generate=True,
|
92 |
output_scores=True,
|
93 |
+
max_new_tokens=1024,
|
94 |
use_cache=True,
|
95 |
)
|
96 |
s = gen_outputs.sequences[0]
|