mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 11:21:56 +00:00
Call iqk_convert_repack in MoE GEMM
This commit is contained in:
@@ -626,7 +626,7 @@ extern "C" IQK_API bool iqk_mul_mat_moe(long Nx, long Ny, long ne00, int ne11,
|
|||||||
this_info.s += ix;
|
this_info.s += ix;
|
||||||
int this_nrc_x = ix + k_x_step <= nrc_x ? k_x_step : nrc_x - ix;
|
int this_nrc_x = ix + k_x_step <= nrc_x ? k_x_step : nrc_x - ix;
|
||||||
if (f.size() < row_size_qx*this_nrc_x) f.resize(row_size_qx*this_nrc_x);
|
if (f.size() < row_size_qx*this_nrc_x) f.resize(row_size_qx*this_nrc_x);
|
||||||
if (!iqk_dequantize_ktquants(typeA, ne00, (const char *)A + (first_x + ix)*strideA, strideA, f.data(), ne00, this_nrc_x)) {
|
if (!iqk_convert_repack(typeA, ne00, (const char *)A + (first_x + ix)*strideA, strideA, f.data(), ne00, this_nrc_x)) {
|
||||||
GGML_ABORT("Fatal error");
|
GGML_ABORT("Fatal error");
|
||||||
}
|
}
|
||||||
mm.mul_mat_NxM(ne00, f.data(), row_size_qx, this_info, this_nrc_x, Ny);
|
mm.mul_mat_NxM(ne00, f.data(), row_size_qx, this_info, this_nrc_x, Ny);
|
||||||
@@ -696,10 +696,10 @@ extern "C" IQK_API bool iqk_moe_fused_up_gate(long Nx, long Ny, long ne00, int n
|
|||||||
if (f.size() < 2*row_size_qx*this_nrc_x) f.resize(2*row_size_qx*this_nrc_x);
|
if (f.size() < 2*row_size_qx*this_nrc_x) f.resize(2*row_size_qx*this_nrc_x);
|
||||||
auto Xu = f.data();
|
auto Xu = f.data();
|
||||||
auto Xg = f.data() + row_size_qx*this_nrc_x;
|
auto Xg = f.data() + row_size_qx*this_nrc_x;
|
||||||
if (!iqk_dequantize_ktquants(typeA, ne00, (const char *)Aup + (first_x + ix)*strideA, strideA, Xu, ne00, this_nrc_x)) {
|
if (!iqk_convert_repack(typeA, ne00, (const char *)Aup + (first_x + ix)*strideA, strideA, Xu, ne00, this_nrc_x)) {
|
||||||
GGML_ABORT("Fatal error");
|
GGML_ABORT("Fatal error");
|
||||||
}
|
}
|
||||||
if (!iqk_dequantize_ktquants(typeA, ne00, (const char *)Agate + (first_x + ix)*strideA, strideA, Xg, ne00, this_nrc_x)) {
|
if (!iqk_convert_repack(typeA, ne00, (const char *)Agate + (first_x + ix)*strideA, strideA, Xg, ne00, this_nrc_x)) {
|
||||||
GGML_ABORT("Fatal error");
|
GGML_ABORT("Fatal error");
|
||||||
}
|
}
|
||||||
mm.mul_mat_up_gate_NxM(ne00, Xu, Xg, row_size_qx, this_info, this_nrc_x, Ny, unary_op);
|
mm.mul_mat_up_gate_NxM(ne00, Xu, Xg, row_size_qx, this_info, this_nrc_x, Ny, unary_op);
|
||||||
|
|||||||
Reference in New Issue
Block a user