// Adapted from turboderp exllama: https://github.com/turboderp/exllama | |
struct ExLlamaTuning | |
{ | |
int matmul_recons_thd; | |
bool matmul_fused_remap; | |
bool matmul_no_half2; | |
}; | |
// Adapted from turboderp exllama: https://github.com/turboderp/exllama | |
struct ExLlamaTuning | |
{ | |
int matmul_recons_thd; | |
bool matmul_fused_remap; | |
bool matmul_no_half2; | |
}; | |