Allow quantization of ffn_gate_inp (#896)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-11-05 10:44:32 +02:00
committed by GitHub
parent 15159a87d4
commit abb966eba1
4 changed files with 19 additions and 2 deletions

View File

@@ -3863,6 +3863,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
/*.ffn_gate_type =*/ GGML_TYPE_COUNT,
/*.ffn_down_type =*/ GGML_TYPE_COUNT,
/*.ffn_up_type =*/ GGML_TYPE_COUNT,
/*.ffn_gat_inp_type =*/ GGML_TYPE_COUNT,
/*.allow_requantize =*/ false,
/*.quantize_output_tensor =*/ true,
/*.only_copy =*/ false,