diff --git a/src/llama.cpp b/src/llama.cpp
index 0020c77f..92ede50c 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16078,12 +16078,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
         else if (qs.model.hparams.n_expert >= 8 && name.find("attn_k") != std::string::npos) {
             new_type = GGML_TYPE_Q4_K_R4;
         }
+        else if (qs.model.hparams.n_expert >= 8 && (name.find("blk.0.ffn_down") != std::string::npos ||
+                                                    name.find("blk.0.ffn_gate") != std::string::npos ||
+                                                    name.find("blk.0.ffn_up") != std::string::npos)) {
+            new_type = GGML_TYPE_IQ3_K_R4;
+        }
         else if (qs.model.hparams.n_expert >= 8 && name.find("attn_q") != std::string::npos) {
             new_type = GGML_TYPE_Q4_K_R4;
         }
         else if (name.find("attn_qkv.weight") != std::string::npos) {
             new_type = GGML_TYPE_IQ2_K_R4;
         }
+        else if (name.find("_shexp.weight") != std::string::npos) {
+            new_type = GGML_TYPE_IQ4_K_R4;
+        }
         else if (name.find("ffn_down") != std::string::npos) {
             auto [i_layer, n_layer] = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
             if (qs.params->ffn_down_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_down_type;