Falcon_OPT / AutoAWQ_kernels /awq_ext /pybind_awq_v2.cpp

Training in progress, step 500

25b4ce2 verified 7 months ago

389 Bytes

	#include <pybind11/pybind11.h>
	#include <torch/extension.h>
	#include "quantization_new/gemm/gemm_cuda.h"
	#include "quantization_new/gemv/gemv_cuda.h"

	PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
	{
	m.def("gemm_forward_cuda_prefill", &gemm_forward_cuda_prefill, "New quantized GEMM kernel.");
	m.def("gemv_forward_cuda_decode", &gemv_forward_cuda_decode, "New quantized GEMM kernel.");
	}