mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 03:41:53 +00:00
Use fused gemv+add only for TG (#933)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -2067,7 +2067,7 @@ static int ggml_cuda_mul_mat_q(ggml_backend_cuda_context & ctx, const ggml_tenso
|
|||||||
|
|
||||||
auto stream = ctx.stream();
|
auto stream = ctx.stream();
|
||||||
|
|
||||||
auto fusion = ctx.fusion;
|
auto fusion = ctx.fusion && src1->ne[1] == 1;
|
||||||
|
|
||||||
auto ne10_padded = GGML_PAD(src1->ne[0], MATRIX_ROW_PADDING);
|
auto ne10_padded = GGML_PAD(src1->ne[0], MATRIX_ROW_PADDING);
|
||||||
auto nb10_padded = ne10_padded*sizeof(block_q8_1)/QK8_1;
|
auto nb10_padded = ne10_padded*sizeof(block_q8_1)/QK8_1;
|
||||||
|
|||||||
Reference in New Issue
Block a user