Update inference/infer.py
Browse files- inference/infer.py +3 -1
inference/infer.py
CHANGED
|
@@ -78,7 +78,9 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 78 |
stage1_model,
|
| 79 |
torch_dtype=torch.bfloat16,
|
| 80 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
| 81 |
-
)
|
|
|
|
|
|
|
| 82 |
|
| 83 |
codectool = CodecManipulator("xcodec", 0, 1)
|
| 84 |
codectool_stage2 = CodecManipulator("xcodec", 0, 8)
|
|
|
|
| 78 |
stage1_model,
|
| 79 |
torch_dtype=torch.bfloat16,
|
| 80 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
| 81 |
+
)
|
| 82 |
+
model.to(device)
|
| 83 |
+
model.eval()
|
| 84 |
|
| 85 |
codectool = CodecManipulator("xcodec", 0, 1)
|
| 86 |
codectool_stage2 = CodecManipulator("xcodec", 0, 8)
|