q2_K: allow it to detect ternary nets and quantize accordingly

This commit is contained in:
Kawrakow
2024-08-05 11:59:36 +03:00
committed by Kawrakow
parent 74f2f50abf
commit 695c7eef49
4 changed files with 55 additions and 4 deletions

View File

@@ -16071,12 +16071,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
}
}
}
if ((new_type == GGML_TYPE_IQ2_XXS ||
if (!params->ignore_imatrix_rules && !imatrix &&
(new_type == GGML_TYPE_IQ2_XXS ||
new_type == GGML_TYPE_IQ2_XS ||
new_type == GGML_TYPE_IQ2_S ||
new_type == GGML_TYPE_IQ1_S ||
(new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) ||
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
LLAMA_LOG_ERROR("\n\n============================================================\n");
LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
@@ -16441,6 +16442,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
/*.only_copy =*/ false,
/*.pure =*/ false,
/*.keep_split =*/ false,
/*.ignore_imatrix_rules =*/ false,
/*.imatrix =*/ nullptr,
/*.kv_overrides =*/ nullptr,
};