mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-09 05:20:01 +00:00
Honor attn_output specified in the command line also for low-bit quants
This commit is contained in:
@@ -16339,7 +16339,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
|
||||
++qs.i_ffn_down;
|
||||
}
|
||||
else if (name.find("attn_output.weight") != std::string::npos) {
|
||||
if (qs.model.hparams.n_expert >= 4) {
|
||||
if (qs.params->attn_output_type < GGML_TYPE_COUNT) new_type = qs.params->attn_output_type;
|
||||
else if (qs.model.hparams.n_expert >= 4) {
|
||||
new_type = GGML_TYPE_Q5_K;
|
||||
} else {
|
||||
if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_K;
|
||||
|
||||
Reference in New Issue
Block a user