mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-05 22:10:10 +00:00
Allow quantization of ffn_gate_inp (#896)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -3863,6 +3863,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||
/*.ffn_gate_type =*/ GGML_TYPE_COUNT,
|
||||
/*.ffn_down_type =*/ GGML_TYPE_COUNT,
|
||||
/*.ffn_up_type =*/ GGML_TYPE_COUNT,
|
||||
/*.ffn_gat_inp_type =*/ GGML_TYPE_COUNT,
|
||||
/*.allow_requantize =*/ false,
|
||||
/*.quantize_output_tensor =*/ true,
|
||||
/*.only_copy =*/ false,
|
||||
|
||||
Reference in New Issue
Block a user