mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 10:51:51 +00:00
Minor
This commit is contained in:
@@ -2719,6 +2719,7 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
|
|||||||
if (use_quantized_src1) {
|
if (use_quantized_src1) {
|
||||||
quantize_mmq_q8_1_id_cuda((const float *)src1->data, src1_quantized.get(), (const char *)(dev_row_mapping.get() + mapping_offset),
|
quantize_mmq_q8_1_id_cuda((const float *)src1->data, src1_quantized.get(), (const char *)(dev_row_mapping.get() + mapping_offset),
|
||||||
src1->ne[0], num_src1_rows, src1_padded_num_cols, src0_1->type, stream);
|
src1->ne[0], num_src1_rows, src1_padded_num_cols, src0_1->type, stream);
|
||||||
|
CUDA_CHECK(cudaGetLastError());
|
||||||
src1_row.data = src1_quantized.get();
|
src1_row.data = src1_quantized.get();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -2745,13 +2746,6 @@ static bool ggml_cuda_up_gate_unary(ggml_backend_cuda_context & ctx, ggml_tensor
|
|||||||
dst_row.nb[2] = num_src1_rows*nb1;
|
dst_row.nb[2] = num_src1_rows*nb1;
|
||||||
dst_row.nb[3] = num_src1_rows*nb1;
|
dst_row.nb[3] = num_src1_rows*nb1;
|
||||||
|
|
||||||
//if (use_quantized_src1) {
|
|
||||||
// quantize_mmq_q8_1_cuda((const float *)src1_contiguous.get(), src1_quantized.get(), src1->ne[0], num_src1_rows, 1,
|
|
||||||
// src1_padded_num_cols, src0_1->type, stream);
|
|
||||||
// CUDA_CHECK(cudaGetLastError());
|
|
||||||
// src1_row.data = src1_quantized.get();
|
|
||||||
//}
|
|
||||||
|
|
||||||
dst_row.data = dst_up_contiguous.get();
|
dst_row.data = dst_up_contiguous.get();
|
||||||
if (use_quantized_src1) {
|
if (use_quantized_src1) {
|
||||||
ggml_cuda_op_mul_mat_q(ctx, &src0_1_row, &src1_row, &dst_row, (const char *)src0_1_row.data, nullptr, src1_quantized.get(), (float *)dst_row.data,
|
ggml_cuda_op_mul_mat_q(ctx, &src0_1_row, &src1_row, &dst_row, (const char *)src0_1_row.data, nullptr, src1_quantized.get(), (float *)dst_row.data,
|
||||||
|
|||||||
Reference in New Issue
Block a user