mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 03:11:51 +00:00
q8_KV_r8: don't use nrc_y = 16 on Zen4
This is faster - 350 t/s. Why? Much better than the 290 t/s we had before, but still slower than the 370 t/s for q8_k_r8.
This commit is contained in:
@@ -9367,9 +9367,9 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) {
|
|||||||
mm.funcs[5] = mul_mat_q8_KV_r8_q8_KV<6>;
|
mm.funcs[5] = mul_mat_q8_KV_r8_q8_KV<6>;
|
||||||
mm.funcs[6] = mul_mat_q8_KV_r8_q8_KV<7>;
|
mm.funcs[6] = mul_mat_q8_KV_r8_q8_KV<7>;
|
||||||
mm.funcs[7] = mul_mat_q8_KV_r8_q8_KV<8>;
|
mm.funcs[7] = mul_mat_q8_KV_r8_q8_KV<8>;
|
||||||
#ifdef HAVE_FANCY_SIMD
|
//#ifdef HAVE_FANCY_SIMD
|
||||||
mm.func16 = mul_mat_q8_KV_r8_q8_KV<16>;
|
// mm.func16 = mul_mat_q8_KV_r8_q8_KV<16>;
|
||||||
#endif
|
//#endif
|
||||||
expected_typeB = GGML_TYPE_Q8_KV;
|
expected_typeB = GGML_TYPE_Q8_KV;
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_IQ4_K_R4:
|
case GGML_TYPE_IQ4_K_R4:
|
||||||
|
|||||||
Reference in New Issue
Block a user