Minor

2026-04-29 10:51:51 +00:00 · 2025-07-06 15:58:49 +03:00
parent 6c55ffa8ef
commit efc440fb29
1 changed files with 1 additions and 7 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -2719,6 +2719,7 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
            if (use_quantized_src1) {
                quantize_mmq_q8_1_id_cuda((const float *)src1->data, src1_quantized.get(), (const char *)(dev_row_mapping.get() + mapping_offset),
                        src1->ne[0], num_src1_rows, src1_padded_num_cols, src0_1->type, stream);
                CUDA_CHECK(cudaGetLastError());
                src1_row.data = src1_quantized.get();
            }
            else {
@@ -2745,13 +2746,6 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
            dst_row.nb[2] = num_src1_rows*nb1;
            dst_row.nb[3] = num_src1_rows*nb1;
            //if (use_quantized_src1) {
            //     quantize_mmq_q8_1_cuda((const float *)src1_contiguous.get(), src1_quantized.get(), src1->ne[0], num_src1_rows, 1,
            //             src1_padded_num_cols, src0_1->type, stream);
            //     CUDA_CHECK(cudaGetLastError());
            //     src1_row.data = src1_quantized.get();
            //}
            dst_row.data  =  dst_up_contiguous.get();
            if (use_quantized_src1) {
                ggml_cuda_op_mul_mat_q(ctx, &src0_1_row, &src1_row, &dst_row, (const char *)src0_1_row.data, nullptr, src1_quantized.get(), (float *)dst_row.data,