From 60afe8d983e29fe322b458954fa99fce2c491ab7 Mon Sep 17 00:00:00 2001
From: turboderp <11859846+turboderp@users.noreply.github.com>
Date: Sat, 7 Mar 2026 20:48:24 +0100
Subject: [PATCH] Update README.md

---
 README.md                            | 9 ++++-----
 exllamav3/modules/gated_delta_net.py | 3 +--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 00e4f66..8a2ee5d 100644
--- a/README.md
+++ b/README.md
@@ -17,11 +17,10 @@ The official and recommended backend server for ExLlamaV3 is [TabbyAPI](https://
 
 ### ⚠️ Important
 
-- **Qwen3-Next** support is currently experimental and still requires some optimization, so don't expect
-  optimal performance just yet. [Flash Linear Attention](https://github.com/fla-org/flash-linear-attention) is required
-  and this in turn requires Triton. [causal-conv1d](https://github.com/Dao-AILab/causal-conv1d) is supported and 
-  recommended but not required.
-- **Qwen3-Next** currently does not support tensor/expert parallelism.
+- **Qwen3-Next** and **Qwen3.5** can take advantage of [Flash Linear Attention](https://github.com/fla-org/flash-linear-attention), though this requires
+  Triton, and performance can be shaky due to the sporadic JIT compilation it imposes. [causal-conv1d](https://github.com/Dao-AILab/causal-conv1d) is
+  supported and recommended but not required.
+- **Qwen3-Next** and **Qwen3.5** currently do not support tensor/expert parallelism.
 
 ## Architecture support
 
diff --git a/exllamav3/modules/gated_delta_net.py b/exllamav3/modules/gated_delta_net.py
index bd89173..6384db1 100644
--- a/exllamav3/modules/gated_delta_net.py
+++ b/exllamav3/modules/gated_delta_net.py
@@ -616,8 +616,7 @@ class GatedDeltaNet(Module):
                 )
 
             # Use chunked rule when advantageous and available
-            # TODO: At least warn if chunked rule (i.e. flash-linear-attention) is not available
-            #       since performance will tank on prompt ingestion
+            # TODO: Replace chunked fn with non-Triton implementation
             if seqlen >= self.num_v_heads and chunk_gated_delta_rule is not None:
                 mixed_qkv = mixed_qkv.transpose(1, 2)