From d81cde5cea7bf99b6a7f5f75a696dc9a8ddf21b0 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Thu, 19 Feb 2026 08:15:26 +0100 Subject: [PATCH] Fix very low bpw missing imatrix check (#1284) --- src/llama-quantize.cpp | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/llama-quantize.cpp b/src/llama-quantize.cpp index 927c3f31..42e3fd75 100644 --- a/src/llama-quantize.cpp +++ b/src/llama-quantize.cpp @@ -1425,23 +1425,27 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } } } - if (!params->ignore_imatrix_rules && !imatrix && - (new_type == GGML_TYPE_IQ2_XXS || - new_type == GGML_TYPE_IQ2_XXS_R4 || - new_type == GGML_TYPE_IQ2_XS || - new_type == GGML_TYPE_IQ2_XS_R4 || - new_type == GGML_TYPE_IQ2_S || - new_type == GGML_TYPE_IQ2_S_R4|| - new_type == GGML_TYPE_IQ1_S || - new_type == GGML_TYPE_IQ1_S_R4|| - new_type == GGML_TYPE_IQ1_M_R4|| - (new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) || - (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) { - LLAMA_LOG_ERROR("\n\n============================================================\n"); - LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name); - LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n"); - LLAMA_LOG_ERROR("============================================================\n\n"); - throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name)); + if (!params->ignore_imatrix_rules && !imatrix) { + bool is_very_low_bpw_quant = new_type == GGML_TYPE_IQ2_XXS || + new_type == GGML_TYPE_IQ2_XXS_R4 || + new_type == GGML_TYPE_IQ2_XS || + new_type == GGML_TYPE_IQ2_XS_R4 || + new_type == GGML_TYPE_IQ2_S || + new_type == GGML_TYPE_IQ2_S_R4 || + new_type == GGML_TYPE_IQ1_S || + new_type == GGML_TYPE_IQ1_S_R4 || + new_type == GGML_TYPE_IQ1_M || + new_type == GGML_TYPE_IQ1_M_R4 || + new_type == GGML_TYPE_IQ1_KT || + new_type == GGML_TYPE_IQ2_KT || + (new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S); + if (is_very_low_bpw_quant && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) { + LLAMA_LOG_ERROR("\n\n============================================================\n"); + LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name); + LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n"); + LLAMA_LOG_ERROR("============================================================\n\n"); + throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name)); + } } float * f32_data;