mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 23:24:13 +00:00
Process up to 16 columns per kernel call for q8_k_r8
This brings PP-512 up to 389 t/s.
This commit is contained in:
@@ -7991,6 +7991,9 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) {
|
||||
mm.funcs[5] = mul_mat_q8_k_r8_q8_k<6>;
|
||||
mm.funcs[6] = mul_mat_q8_k_r8_q8_k<7>;
|
||||
mm.funcs[7] = mul_mat_q8_k_r8_q8_k<8>;
|
||||
#ifdef HAVE_FANCY_SIMD
|
||||
mm.func16 = mul_mat_q8_k_r8_q8_k<16>;
|
||||
#endif
|
||||
expected_typeB = GGML_TYPE_Q8_KR8;
|
||||
break;
|
||||
case GGML_TYPE_IQ4_K_R4:
|
||||
|
||||
Reference in New Issue
Block a user