Commit
·
a05273e
1
Parent(s):
d346fb2
Update README.md
Browse files
README.md
CHANGED
@@ -119,7 +119,7 @@ bnb_4bit_quant_type = "nf4"
|
|
119 |
use_double_nested_quant = True
|
120 |
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
|
121 |
|
122 |
-
# BitsAndBytesConfig
|
123 |
bnb_config = BitsAndBytesConfig(
|
124 |
load_in_4bit=use_4bit,
|
125 |
bnb_4bit_use_double_quant=use_double_nested_quant,
|
@@ -149,6 +149,6 @@ input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cu
|
|
149 |
# Run the model to infere an output
|
150 |
outputs = model.generate(input_ids=input_ids, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=0.001, pad_token_id=1)
|
151 |
|
152 |
-
#
|
153 |
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):])
|
154 |
```
|
|
|
119 |
use_double_nested_quant = True
|
120 |
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
|
121 |
|
122 |
+
# BitsAndBytesConfig 4-bit config
|
123 |
bnb_config = BitsAndBytesConfig(
|
124 |
load_in_4bit=use_4bit,
|
125 |
bnb_4bit_use_double_quant=use_double_nested_quant,
|
|
|
149 |
# Run the model to infere an output
|
150 |
outputs = model.generate(input_ids=input_ids, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=0.001, pad_token_id=1)
|
151 |
|
152 |
+
# Detokenize and display the generated output
|
153 |
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):])
|
154 |
```
|