duzx16
commited on
Commit
·
63d66b0
1
Parent(s):
f55a108
Remove assert in load_cpu_kernel
Browse files- quantization.py +2 -4
quantization.py
CHANGED
|
@@ -442,7 +442,6 @@ class QuantizedEmbedding(Embedding): # TODO: backward, check empty_init
|
|
| 442 |
def load_cpu_kernel(**kwargs):
|
| 443 |
global cpu_kernels
|
| 444 |
cpu_kernels = CPUKernel(**kwargs)
|
| 445 |
-
assert cpu_kernels.load
|
| 446 |
|
| 447 |
|
| 448 |
def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
|
|
@@ -453,9 +452,8 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
|
|
| 453 |
dense_h_to_4h_quantization_cache = None
|
| 454 |
dense_4h_to_h_quantization_cache = None
|
| 455 |
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
except:
|
| 459 |
if kernels is None: # CUDA kernels failed
|
| 460 |
print("Cannot load cpu or cuda kernel, quantization failed:")
|
| 461 |
assert kernels is not None
|
|
|
|
| 442 |
def load_cpu_kernel(**kwargs):
|
| 443 |
global cpu_kernels
|
| 444 |
cpu_kernels = CPUKernel(**kwargs)
|
|
|
|
| 445 |
|
| 446 |
|
| 447 |
def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
|
|
|
|
| 452 |
dense_h_to_4h_quantization_cache = None
|
| 453 |
dense_4h_to_h_quantization_cache = None
|
| 454 |
|
| 455 |
+
load_cpu_kernel(**kwargs)
|
| 456 |
+
if not cpu_kernels.load:
|
|
|
|
| 457 |
if kernels is None: # CUDA kernels failed
|
| 458 |
print("Cannot load cpu or cuda kernel, quantization failed:")
|
| 459 |
assert kernels is not None
|