mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Fix very low bpw missing imatrix check (#1284)
This commit is contained in:
@@ -1425,23 +1425,27 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!params->ignore_imatrix_rules && !imatrix &&
|
if (!params->ignore_imatrix_rules && !imatrix) {
|
||||||
(new_type == GGML_TYPE_IQ2_XXS ||
|
bool is_very_low_bpw_quant = new_type == GGML_TYPE_IQ2_XXS ||
|
||||||
new_type == GGML_TYPE_IQ2_XXS_R4 ||
|
new_type == GGML_TYPE_IQ2_XXS_R4 ||
|
||||||
new_type == GGML_TYPE_IQ2_XS ||
|
new_type == GGML_TYPE_IQ2_XS ||
|
||||||
new_type == GGML_TYPE_IQ2_XS_R4 ||
|
new_type == GGML_TYPE_IQ2_XS_R4 ||
|
||||||
new_type == GGML_TYPE_IQ2_S ||
|
new_type == GGML_TYPE_IQ2_S ||
|
||||||
new_type == GGML_TYPE_IQ2_S_R4||
|
new_type == GGML_TYPE_IQ2_S_R4 ||
|
||||||
new_type == GGML_TYPE_IQ1_S ||
|
new_type == GGML_TYPE_IQ1_S ||
|
||||||
new_type == GGML_TYPE_IQ1_S_R4||
|
new_type == GGML_TYPE_IQ1_S_R4 ||
|
||||||
new_type == GGML_TYPE_IQ1_M_R4||
|
new_type == GGML_TYPE_IQ1_M ||
|
||||||
(new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) ||
|
new_type == GGML_TYPE_IQ1_M_R4 ||
|
||||||
(new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
|
new_type == GGML_TYPE_IQ1_KT ||
|
||||||
LLAMA_LOG_ERROR("\n\n============================================================\n");
|
new_type == GGML_TYPE_IQ2_KT ||
|
||||||
LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
|
(new_type == GGML_TYPE_Q2_K && ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S);
|
||||||
LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
|
if (is_very_low_bpw_quant && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) {
|
||||||
LLAMA_LOG_ERROR("============================================================\n\n");
|
LLAMA_LOG_ERROR("\n\n============================================================\n");
|
||||||
throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
|
LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
|
||||||
|
LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
|
||||||
|
LLAMA_LOG_ERROR("============================================================\n\n");
|
||||||
|
throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float * f32_data;
|
float * f32_data;
|
||||||
|
|||||||
Reference in New Issue
Block a user