From 1b9e58c9b59d2f66b00f6d668993686024991acc Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Wed, 11 Mar 2026 20:25:56 +0100 Subject: [PATCH] BlockSparseMLP: Skip redundant gather --- exllamav3/modules/block_sparse_mlp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exllamav3/modules/block_sparse_mlp.py b/exllamav3/modules/block_sparse_mlp.py index 115f34c..4dbb31e 100644 --- a/exllamav3/modules/block_sparse_mlp.py +++ b/exllamav3/modules/block_sparse_mlp.py @@ -615,12 +615,12 @@ class BlockSparseMLP(Module): # Group once by local expert id (including sentinel for expert-P mode) order = flat_expert_local.argsort() - local_sorted = flat_expert_local[order] + # local_sorted = flat_expert_local[order] token_sorted = flat_token[order] weight_sorted = flat_weight[order] # Count how many assignments per expert - expert_count = torch.bincount(local_sorted, minlength = E + 1) + expert_count = torch.bincount(flat_expert_local, minlength = E + 1) expert_ptr = torch.empty(E + 2, device = y.device, dtype = torch.long) expert_ptr[0] = 0 expert_ptr[1:] = expert_count.cumsum(0)