mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 03:11:51 +00:00
Prevent assert with quantized K cache and no FA
This commit is contained in:
@@ -1647,7 +1647,7 @@ static void ggml_cuda_op_mul_mat(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int64_t src1_col_stride = split && used_devices > 1 ? MUL_MAT_SRC1_COL_STRIDE : ne11;
|
const int64_t src1_col_stride = split && used_devices > 1 ? MUL_MAT_SRC1_COL_STRIDE : ne11;
|
||||||
if (!(split && used_devices > 1) && quantization_done && ne11 == 1 && ne12 > 1 && ne13 == 1) {
|
if (!(split && used_devices > 1) && quantization_done && ne11 == 1 && ne12 > 1 && ne13 == 1 && ne02 == ne12 && ne02 == dst->ne[2]) {
|
||||||
//printf("invoking fast path for %s x %s\n", src0->name, src1->name);
|
//printf("invoking fast path for %s x %s\n", src0->name, src1->name);
|
||||||
int id = ctx.device;
|
int id = ctx.device;
|
||||||
char * src0_dd_i = dev[id].src0_dd;
|
char * src0_dd_i = dev[id].src0_dd;
|
||||||
|
|||||||
Reference in New Issue
Block a user