mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-02 12:21:42 +00:00
Fix q8_0 repacking issues on AVX2 (#708)
Q8_0 needs Q0_0_X4, but Q8_0_R8 needs Q8_2_X4. So, if we decide to repack a Q8_0 MoE tensor to Q8_0_R8, iqk_moe_fused_mul_unary fails because the activations were prepared as Q0_0_X4, but we now need Q8_2_X4. For now a simple fix: just take the slow path, do not repack. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -737,9 +737,7 @@ extern "C" IQK_API bool iqk_moe_fused_up_gate(long Nx, long Ny, long ne00, int n
|
||||
|
||||
auto etypeA = ggml_type(typeA);
|
||||
if (auto dequant_type = MulMat::is_dequant_better(etypeA, Ny); dequant_type != etypeA) {
|
||||
if (!MulMat::prepare(dequant_type, typeB, ne00, mm, Ny)) {
|
||||
return false;
|
||||
}
|
||||
if (MulMat::prepare(dequant_type, typeB, ne00, mm, Ny)) {
|
||||
|
||||
constexpr int k_x_step = 64;
|
||||
|
||||
@@ -777,6 +775,7 @@ extern "C" IQK_API bool iqk_moe_fused_up_gate(long Nx, long Ny, long ne00, int n
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user