Vulkan: fix u_batch > 4096/n_active_experts

for coopmat1. Without this fix we get an assert. We get the same assert in mainline too.
2026-04-28 18:32:04 +00:00 · 2025-07-14 17:28:55 +03:00
parent c7f3515a58
commit 14ef9ebe9a
1 changed files with 8 additions and 2 deletions
--- a/ggml/src/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan.cpp
@@ -4177,8 +4177,8 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_id_pipeline(ggml_backend_vk_co
    }

    if (!(src1_type == GGML_TYPE_F32 || (ctx->device->coopmat2 && src1_type == GGML_TYPE_F16))) {
-        printf("Oops: %s, %s, prec = %d, ctx->device->fp16 = %d, ctx->device->coopmat_support = %d, ctx->device->coopmat_acc_f16_support = %d\n",
-                ggml_type_name(src0_type), ggml_type_name(src1_type), prec, ctx->device->fp16, ctx->device->coopmat_support, ctx->device->coopmat_acc_f16_support);
+        // Better we return a nullptr than assert below
+        return nullptr;
    }

    GGML_ASSERT(src1_type == GGML_TYPE_F32 || (ctx->device->coopmat2 && src1_type == GGML_TYPE_F16));
@@ -6157,9 +6157,15 @@ static void ggml_vk_mul_mat_id(ggml_backend_vk_context * ctx, vk_context& subctx
            src2_copy.view_offs = src2->view_offs + token_start * src2_copy.nb[1];
            dst_copy.view_offs = dst->view_offs + token_start * dst_copy.nb[2];

+            // Note: we do need to update the nb members, else the copies are interpreted as being non-contiguous,
+            //       triggers an assert
            src1_copy.ne[2] = n_tokens;
+            src1_copy.nb[3] = src1_copy.nb[2] * src1_copy.ne[2];
            src2_copy.ne[1] = n_tokens;
+            src2_copy.nb[2] = src2_copy.nb[1] * src2_copy.ne[1];
+            src2_copy.nb[3] = src2_copy.nb[2] * src2_copy.ne[2];
            dst_copy.ne[2] = n_tokens;
+            dst_copy.nb[3] = dst_copy.nb[2] * dst_copy.ne[2];

            ggml_vk_mul_mat_id_q_f16(ctx, subctx, src0, &src1_copy, &src2_copy, &dst_copy, dryrun);
        }