Update README.md
Browse files
README.md
CHANGED
@@ -9,13 +9,35 @@ library_name: transformers
|
|
9 |
pipeline_tag: text-generation
|
10 |
---
|
11 |
|
12 |
-
## Quantization
|
13 |
|
14 |
This repo contains a GPTQ 4bit quantized version of the openchat/openchat-3.6-8b-20240522 model.
|
15 |
|
16 |
### Using with transfomers
|
17 |
|
18 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
```
|
20 |
|
21 |
# Original Model Card
|
|
|
9 |
pipeline_tag: text-generation
|
10 |
---
|
11 |
|
12 |
+
## Quantization Details
|
13 |
|
14 |
This repo contains a GPTQ 4bit quantized version of the openchat/openchat-3.6-8b-20240522 model.
|
15 |
|
16 |
### Using with transfomers
|
17 |
|
18 |
```python
|
19 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
20 |
+
|
21 |
+
model_name_or_path = "thesven/openchat-3.6-8b-20240522-GPTQ"
|
22 |
+
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
25 |
+
device_map="auto",
|
26 |
+
trust_remote_code=False,
|
27 |
+
revision="main")
|
28 |
+
model.pad_token = model.config.eos_token_id
|
29 |
+
|
30 |
+
|
31 |
+
prompt_template=f'''
|
32 |
+
<<SYS>> You are a very creative story writer. Write a store on the following topic:<</SYS>>
|
33 |
+
[INST] Write a story about Ai[/INST]
|
34 |
+
[ASSISTANT]
|
35 |
+
'''
|
36 |
+
|
37 |
+
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
38 |
+
output = model.generate(inputs=input_ids, temperature=0.1, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
|
39 |
+
|
40 |
+
print(tokenizer.decode(output[0]))
|
41 |
```
|
42 |
|
43 |
# Original Model Card
|