From 343e335ff047dcf406a5326d88944fd086186caa Mon Sep 17 00:00:00 2001
From: yurko <yurko@pop-os.tail5a1a6b.ts.net>
Date: Sun, 8 Feb 2026 00:08:33 -0800
Subject: [PATCH] qwen3next: warn when forcing fused decode mode

---
 src/llama-build-context.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp
index d1112fcd..fc749a5f 100644
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -4208,6 +4208,13 @@ ggml_cgraph * llm_build_context::build_qwen3next() {
                 return qwen3next_fused_delta_mode::off;
         }
     }();
+    if (fused_delta_mode == qwen3next_fused_delta_mode::all_tokens) {
+        static bool warned_all_tokens = false;
+        if (!warned_all_tokens) {
+            LLAMA_LOG_WARN("%s: LLAMA_QWEN3NEXT_FUSED_DELTA=2 enables fused single-token decode; quality regression is known in this mode\n", __func__);
+            warned_all_tokens = true;
+        }
+    }
 
     auto get_slice_2d = [&](ggml_tensor * t, int64_t c) -> ggml_tensor * {
         return ggml_view_4d(ctx0, t, t->ne[0], t->ne[1], 1, t->ne[3],