mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-01 01:24:08 +00:00
q2_K: allow it to detect ternary nets and quantize accordingly
This commit is contained in:
@@ -16071,12 +16071,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((new_type == GGML_TYPE_IQ2_XXS ||
|
||||
if (!params->ignore_imatrix_rules && !imatrix &&
|
||||
(new_type == GGML_TYPE_IQ2_XXS ||
|
||||
new_type == GGML_TYPE_IQ2_XS ||
|
||||
new_type == GGML_TYPE_IQ2_S ||
|
||||
new_type == GGML_TYPE_IQ1_S ||
|
||||
(new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) ||
|
||||
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
|
||||
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
|
||||
LLAMA_LOG_ERROR("\n\n============================================================\n");
|
||||
LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
|
||||
LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
|
||||
@@ -16441,6 +16442,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||
/*.only_copy =*/ false,
|
||||
/*.pure =*/ false,
|
||||
/*.keep_split =*/ false,
|
||||
/*.ignore_imatrix_rules =*/ false,
|
||||
/*.imatrix =*/ nullptr,
|
||||
/*.kv_overrides =*/ nullptr,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user