From d81cde5cea7bf99b6a7f5f75a696dc9a8ddf21b0 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Thu, 19 Feb 2026 08:15:26 +0100
Subject: [PATCH] Fix very low bpw missing imatrix check (#1284)

---
 src/llama-quantize.cpp | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/llama-quantize.cpp b/src/llama-quantize.cpp
index 927c3f31..42e3fd75 100644
--- a/src/llama-quantize.cpp
+++ b/src/llama-quantize.cpp
@@ -1425,23 +1425,27 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     }
                 }
             }
-            if (!params->ignore_imatrix_rules && !imatrix &&
-                (new_type == GGML_TYPE_IQ2_XXS ||
-                 new_type == GGML_TYPE_IQ2_XXS_R4 ||
-                 new_type == GGML_TYPE_IQ2_XS  ||
-                 new_type == GGML_TYPE_IQ2_XS_R4  ||
-                 new_type == GGML_TYPE_IQ2_S   ||
-                 new_type == GGML_TYPE_IQ2_S_R4||
-                 new_type == GGML_TYPE_IQ1_S   ||
-                 new_type == GGML_TYPE_IQ1_S_R4||
-                 new_type == GGML_TYPE_IQ1_M_R4||
-                (new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight"))  ||
-                (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
-                LLAMA_LOG_ERROR("\n\n============================================================\n");
-                LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
-                LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
-                LLAMA_LOG_ERROR("============================================================\n\n");
-                throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
+            if (!params->ignore_imatrix_rules && !imatrix) {
+                bool is_very_low_bpw_quant = new_type == GGML_TYPE_IQ2_XXS    ||
+                                             new_type == GGML_TYPE_IQ2_XXS_R4 ||
+                                             new_type == GGML_TYPE_IQ2_XS     ||
+                                             new_type == GGML_TYPE_IQ2_XS_R4  ||
+                                             new_type == GGML_TYPE_IQ2_S      ||
+                                             new_type == GGML_TYPE_IQ2_S_R4   ||
+                                             new_type == GGML_TYPE_IQ1_S      ||
+                                             new_type == GGML_TYPE_IQ1_S_R4   ||
+                                             new_type == GGML_TYPE_IQ1_M      ||
+                                             new_type == GGML_TYPE_IQ1_M_R4   ||
+                                             new_type == GGML_TYPE_IQ1_KT     ||
+                                             new_type == GGML_TYPE_IQ2_KT     ||
+                                            (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S);
+                if (is_very_low_bpw_quant && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) {
+                    LLAMA_LOG_ERROR("\n\n============================================================\n");
+                    LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
+                    LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
+                    LLAMA_LOG_ERROR("============================================================\n\n");
+                    throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
+                }
             }
 
             float * f32_data;