mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 07:04:11 +00:00
Minor
This commit is contained in:
@@ -2719,6 +2719,7 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
|
||||
if (use_quantized_src1) {
|
||||
quantize_mmq_q8_1_id_cuda((const float *)src1->data, src1_quantized.get(), (const char *)(dev_row_mapping.get() + mapping_offset),
|
||||
src1->ne[0], num_src1_rows, src1_padded_num_cols, src0_1->type, stream);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
src1_row.data = src1_quantized.get();
|
||||
}
|
||||
else {
|
||||
@@ -2745,13 +2746,6 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
|
||||
dst_row.nb[2] = num_src1_rows*nb1;
|
||||
dst_row.nb[3] = num_src1_rows*nb1;
|
||||
|
||||
//if (use_quantized_src1) {
|
||||
// quantize_mmq_q8_1_cuda((const float *)src1_contiguous.get(), src1_quantized.get(), src1->ne[0], num_src1_rows, 1,
|
||||
// src1_padded_num_cols, src0_1->type, stream);
|
||||
// CUDA_CHECK(cudaGetLastError());
|
||||
// src1_row.data = src1_quantized.get();
|
||||
//}
|
||||
|
||||
dst_row.data = dst_up_contiguous.get();
|
||||
if (use_quantized_src1) {
|
||||
ggml_cuda_op_mul_mat_q(ctx, &src0_1_row, &src1_row, &dst_row, (const char *)src0_1_row.data, nullptr, src1_quantized.get(), (float *)dst_row.data,
|
||||
|
||||
Reference in New Issue
Block a user