Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,63 @@
|
|
1 |
<img src="https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png" alt="Tulu 3 banner" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# Llama-3.1-Tulu-3-405B
|
4 |
|
5 |
Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques.
|
|
|
1 |
<img src="https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png" alt="Tulu 3 banner" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
|
2 |
|
3 |
+
### Dynamic FP8 quantization using llmcompressor
|
4 |
+
|
5 |
+
```python
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from transformers import AutoTokenizer
|
9 |
+
|
10 |
+
from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot
|
11 |
+
from llmcompressor.transformers.compression.helpers import ( # noqa
|
12 |
+
calculate_offload_device_map,
|
13 |
+
custom_offload_device_map,
|
14 |
+
)
|
15 |
+
|
16 |
+
recipe = """
|
17 |
+
quant_stage:
|
18 |
+
quant_modifiers:
|
19 |
+
QuantizationModifier:
|
20 |
+
ignore: ["lm_head"]
|
21 |
+
config_groups:
|
22 |
+
group_0:
|
23 |
+
weights:
|
24 |
+
num_bits: 8
|
25 |
+
type: float
|
26 |
+
strategy: channel
|
27 |
+
dynamic: false
|
28 |
+
symmetric: true
|
29 |
+
input_activations:
|
30 |
+
num_bits: 8
|
31 |
+
type: float
|
32 |
+
strategy: token
|
33 |
+
dynamic: true
|
34 |
+
symmetric: true
|
35 |
+
targets: ["Linear"]
|
36 |
+
"""
|
37 |
+
|
38 |
+
model_stub = "allenai/Llama-3.1-Tulu-3-405B"
|
39 |
+
|
40 |
+
model_name = model_stub.split("/")[-1]
|
41 |
+
|
42 |
+
device_map = calculate_offload_device_map(
|
43 |
+
model_stub, reserve_for_hessians=False, num_gpus=8, torch_dtype="auto"
|
44 |
+
)
|
45 |
+
|
46 |
+
model = SparseAutoModelForCausalLM.from_pretrained(
|
47 |
+
model_stub, torch_dtype="auto", device_map=device_map
|
48 |
+
)
|
49 |
+
|
50 |
+
output_dir = f"./{model_name}-FP8-dynamic"
|
51 |
+
|
52 |
+
oneshot(
|
53 |
+
model=model,
|
54 |
+
recipe=recipe,
|
55 |
+
output_dir=output_dir,
|
56 |
+
save_compressed=True,
|
57 |
+
tokenizer=AutoTokenizer.from_pretrained(model_stub),
|
58 |
+
)
|
59 |
+
```
|
60 |
+
|
61 |
# Llama-3.1-Tulu-3-405B
|
62 |
|
63 |
Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques.
|