Allow quantization of ffn_gate_inp

2026-03-03 10:30:27 +00:00 · 2025-11-04 11:34:10 +02:00
parent cd8d0b0832
commit 04e57f4356
4 changed files with 19 additions and 2 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -454,6 +454,7 @@ extern "C" {
        enum ggml_type ffn_gate_type;        // feedforward network gate type
        enum ggml_type ffn_down_type;        // feedforward network down type
        enum ggml_type ffn_up_type;          // feedforward network up type
+        enum ggml_type ffn_gate_inp_type;    // routed experts probabilities typy (relevant for MoE models only)
        bool allow_requantize;               // allow quantizing non-f32/f16 tensors
        bool quantize_output_tensor;         // quantize output.weight
        bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored