This commit is contained in:
Iwan Kawrakow
2025-07-06 15:58:49 +03:00
parent 6c55ffa8ef
commit efc440fb29

View File

@@ -2719,6 +2719,7 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
if (use_quantized_src1) {
quantize_mmq_q8_1_id_cuda((const float *)src1->data, src1_quantized.get(), (const char *)(dev_row_mapping.get() + mapping_offset),
src1->ne[0], num_src1_rows, src1_padded_num_cols, src0_1->type, stream);
CUDA_CHECK(cudaGetLastError());
src1_row.data = src1_quantized.get();
}
else {
@@ -2745,13 +2746,6 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
dst_row.nb[2] = num_src1_rows*nb1;
dst_row.nb[3] = num_src1_rows*nb1;
//if (use_quantized_src1) {
// quantize_mmq_q8_1_cuda((const float *)src1_contiguous.get(), src1_quantized.get(), src1->ne[0], num_src1_rows, 1,
// src1_padded_num_cols, src0_1->type, stream);
// CUDA_CHECK(cudaGetLastError());
// src1_row.data = src1_quantized.get();
//}
dst_row.data = dst_up_contiguous.get();
if (use_quantized_src1) {
ggml_cuda_op_mul_mat_q(ctx, &src0_1_row, &src1_row, &dst_row, (const char *)src0_1_row.data, nullptr, src1_quantized.get(), (float *)dst_row.data,