Update README.md
Browse files
README.md
CHANGED
|
@@ -255,7 +255,7 @@ pip install fbgemm-gpu-genai
|
|
| 255 |
|
| 256 |
```Shell
|
| 257 |
export MODEL=Qwen/Qwen3-32B # or pytorch/Qwen3-32B-FP8
|
| 258 |
-
VLLM_DISABLE_COMPILE_CACHE=1
|
| 259 |
```
|
| 260 |
</details>
|
| 261 |
|
|
|
|
| 255 |
|
| 256 |
```Shell
|
| 257 |
export MODEL=Qwen/Qwen3-32B # or pytorch/Qwen3-32B-FP8
|
| 258 |
+
VLLM_DISABLE_COMPILE_CACHE=1 vllm bench latency --input-len 256 --output-len 256 --model $MODEL --batch-size 1
|
| 259 |
```
|
| 260 |
</details>
|
| 261 |
|