From 2965eec9197bae3333f0051bdb2fed289647ec0a Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Tue, 3 Mar 2026 05:01:16 +0100 Subject: [PATCH] GatedDeltaNet: Skip redundant zeroing of buffers (Qwen3-Next) --- exllamav3/modules/gated_delta_net.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exllamav3/modules/gated_delta_net.py b/exllamav3/modules/gated_delta_net.py index 29c325c..319f856 100644 --- a/exllamav3/modules/gated_delta_net.py +++ b/exllamav3/modules/gated_delta_net.py @@ -562,10 +562,10 @@ class GatedDeltaNet(Module): qkvz = self.qkvz_proj.forward(x, params) ba = self.ba_proj.forward(x, params) - mixed_qkv = torch.zeros((bsz, self.fdim_qkv, seqlen), dtype = torch.bfloat16, device = self.device) - z = torch.zeros((bsz, seqlen, self.num_v_heads, self.v_head_dim), dtype = torch.bfloat16, device = self.device) - beta = torch.zeros((bsz, seqlen, self.num_v_heads), dtype = torch.bfloat16, device = self.device) - g = torch.zeros((bsz, seqlen, self.num_v_heads), dtype = torch.float, device = self.device) + mixed_qkv = torch.empty((bsz, self.fdim_qkv, seqlen), dtype = torch.bfloat16, device = self.device) + z = torch.empty((bsz, seqlen, self.num_v_heads, self.v_head_dim), dtype = torch.bfloat16, device = self.device) + beta = torch.empty((bsz, seqlen, self.num_v_heads), dtype = torch.bfloat16, device = self.device) + g = torch.empty((bsz, seqlen, self.num_v_heads), dtype = torch.float, device = self.device) ext.gated_delta_net_fused_op( qkvz, ba,