Fix very low bpw missing imatrix check (#1284)

2026-02-27 00:24:11 +00:00 · 2026-02-19 08:15:26 +01:00
parent 51df09be8a
commit d81cde5cea
1 changed files with 21 additions and 17 deletions
--- a/src/llama-quantize.cpp
+++ b/src/llama-quantize.cpp
@@ -1425,23 +1425,27 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                    }
                }
            }
-            if (!params->ignore_imatrix_rules && !imatrix &&
-                (new_type == GGML_TYPE_IQ2_XXS ||
-                 new_type == GGML_TYPE_IQ2_XXS_R4 ||
-                 new_type == GGML_TYPE_IQ2_XS  ||
-                 new_type == GGML_TYPE_IQ2_XS_R4  ||
-                 new_type == GGML_TYPE_IQ2_S   ||
-                 new_type == GGML_TYPE_IQ2_S_R4||
-                 new_type == GGML_TYPE_IQ1_S   ||
-                 new_type == GGML_TYPE_IQ1_S_R4||
-                 new_type == GGML_TYPE_IQ1_M_R4||
-                (new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight"))  ||
-                (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
-                LLAMA_LOG_ERROR("\n\n============================================================\n");
-                LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
-                LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
-                LLAMA_LOG_ERROR("============================================================\n\n");
-                throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
+            if (!params->ignore_imatrix_rules && !imatrix) {
+                bool is_very_low_bpw_quant = new_type == GGML_TYPE_IQ2_XXS    ||
+                                             new_type == GGML_TYPE_IQ2_XXS_R4 ||
+                                             new_type == GGML_TYPE_IQ2_XS     ||
+                                             new_type == GGML_TYPE_IQ2_XS_R4  ||
+                                             new_type == GGML_TYPE_IQ2_S      ||
+                                             new_type == GGML_TYPE_IQ2_S_R4   ||
+                                             new_type == GGML_TYPE_IQ1_S      ||
+                                             new_type == GGML_TYPE_IQ1_S_R4   ||
+                                             new_type == GGML_TYPE_IQ1_M      ||
+                                             new_type == GGML_TYPE_IQ1_M_R4   ||
+                                             new_type == GGML_TYPE_IQ1_KT     ||
+                                             new_type == GGML_TYPE_IQ2_KT     ||
+                                            (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S);
+                if (is_very_low_bpw_quant && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) {
+                    LLAMA_LOG_ERROR("\n\n============================================================\n");
+                    LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
+                    LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
+                    LLAMA_LOG_ERROR("============================================================\n\n");
+                    throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
+                }
            }

            float * f32_data;