diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d00f50a3..a3b36108 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -15021,7 +15021,10 @@ static void ggml_compute_forward_mul_mat_id_up_gate( if (ids->ne[1] == 1 && dst->type == GGML_TYPE_F32) { int gcd = simple_gcd(n_ids, nth); if (gcd > 1) { - ggml_barrier(params->shared); + if (src1->type != vec_dot_type) { + // make sure quantization has finished + ggml_barrier(params->shared); + } const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; const size_t row_size = ggml_row_size(vec_dot_type, ne10); int counter = 0; diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 4d29e2f0..1242c2fb 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -532,7 +532,9 @@ bool iqk_moe_fused_up_gate(long Nx, long Ny, long ne00, int ne11, int unary_op, float * C, long nb1, long nb2, const void * vrow_mapping, int ith, int nth) { const mmid_row_mapping * row_mapping = (const mmid_row_mapping *)vrow_mapping; - assert(row_mapping != nullptr); + // Removing this assert to accomodate usage without row id mapping (e.g., for Ny = 1, + // or if B has been prepared to be contiguous. + //assert(row_mapping != nullptr); MulMat mm; if (!MulMat::prepare(typeA, typeB, ne00, mm, Ny)) {