amd
/

deepseek-moe-16b-chat-FP8-KV

Model card Files Files and versions Community

bowenbaoamd commited on Dec 19, 2024

Commit

3c59ff8

·

verified ·

1 Parent(s): df42951

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -38,7 +38,7 @@ python3 quantize_quark.py \
         --output_dir deepseek-moe-16b-chat-FP8-KV \
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
-        --num_calib_data 128 \
         --model_export quark_safetensors \
         --no_weight_matrix_merge \
         --multi_gpu

         --output_dir deepseek-moe-16b-chat-FP8-KV \
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
+        --num_calib_data 128 \
         --model_export quark_safetensors \
         --no_weight_matrix_merge \
         --multi_gpu