Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
1e07946
1
Parent(s):
b3508c3
Use array append
Browse files
app.py
CHANGED
|
@@ -221,20 +221,24 @@ def quantize_model(
|
|
| 221 |
quantize_cmd = ["llama-quantize"]
|
| 222 |
|
| 223 |
if quant_embedding:
|
| 224 |
-
quantize_cmd
|
|
|
|
| 225 |
if quant_output:
|
| 226 |
-
quantize_cmd
|
|
|
|
| 227 |
|
| 228 |
if use_imatrix:
|
| 229 |
-
quantize_cmd
|
|
|
|
| 230 |
|
| 231 |
quantized_gguf = str(Path(outdir)/gguf_name)
|
| 232 |
-
quantize_cmd
|
|
|
|
| 233 |
|
| 234 |
if use_imatrix:
|
| 235 |
-
quantize_cmd
|
| 236 |
else:
|
| 237 |
-
quantize_cmd
|
| 238 |
|
| 239 |
print(f"Quantizing model with {quantize_cmd}")
|
| 240 |
result = subprocess.run(quantize_cmd, shell=False, capture_output=True)
|
|
|
|
| 221 |
quantize_cmd = ["llama-quantize"]
|
| 222 |
|
| 223 |
if quant_embedding:
|
| 224 |
+
quantize_cmd.append("--token-embedding-type")
|
| 225 |
+
quantize_cmd.append(embedding_tensor_method)
|
| 226 |
if quant_output:
|
| 227 |
+
quantize_cmd.append("--output-tensor-type")
|
| 228 |
+
quantize_cmd.append(output_tensor_method)
|
| 229 |
|
| 230 |
if use_imatrix:
|
| 231 |
+
quantize_cmd.append("--imatrix")
|
| 232 |
+
quantize_cmd.append(imatrix_path)
|
| 233 |
|
| 234 |
quantized_gguf = str(Path(outdir)/gguf_name)
|
| 235 |
+
quantize_cmd.append(fp16)
|
| 236 |
+
quantize_cmd.append(quantized_gguf)
|
| 237 |
|
| 238 |
if use_imatrix:
|
| 239 |
+
quantize_cmd.append(imatrix_q_method)
|
| 240 |
else:
|
| 241 |
+
quantize_cmd.append(q_method)
|
| 242 |
|
| 243 |
print(f"Quantizing model with {quantize_cmd}")
|
| 244 |
result = subprocess.run(quantize_cmd, shell=False, capture_output=True)
|