Allow quantization of ffn_gate_inp (#896)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-02-05 22:10:10 +00:00 · 2025-11-05 10:44:32 +02:00
parent 15159a87d4
commit abb966eba1
4 changed files with 19 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3863,6 +3863,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
        /*.ffn_gate_type               =*/ GGML_TYPE_COUNT,
        /*.ffn_down_type               =*/ GGML_TYPE_COUNT,
        /*.ffn_up_type                 =*/ GGML_TYPE_COUNT,
+        /*.ffn_gat_inp_type            =*/ GGML_TYPE_COUNT,
        /*.allow_requantize            =*/ false,
        /*.quantize_output_tensor      =*/ true,
        /*.only_copy                   =*/ false,