From eb1686a84074e0d0ef9c72831fa641ee8ad31a5d Mon Sep 17 00:00:00 2001
From: turboderp <11859846+turboderp@users.noreply.github.com>
Date: Tue, 3 Mar 2026 22:33:58 +0100
Subject: [PATCH] GatedDeltaNet: Set chunked seqlen threshold to num_v_heads
 (prevents warning from FLA)

---
 exllamav3/modules/gated_delta_net.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/exllamav3/modules/gated_delta_net.py b/exllamav3/modules/gated_delta_net.py
index 0d1982e..bd89173 100644
--- a/exllamav3/modules/gated_delta_net.py
+++ b/exllamav3/modules/gated_delta_net.py
@@ -618,7 +618,7 @@ class GatedDeltaNet(Module):
             # Use chunked rule when advantageous and available
             # TODO: At least warn if chunked rule (i.e. flash-linear-attention) is not available
             #       since performance will tank on prompt ingestion
-            if seqlen >= 32 and chunk_gated_delta_rule is not None:
+            if seqlen >= self.num_v_heads and chunk_gated_delta_rule is not None:
                 mixed_qkv = mixed_qkv.transpose(1, 2)
 
                 q, k, v = torch.split(mixed_qkv, [self.k_dim, self.k_dim, self.v_dim], dim = -1)