diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 6a80ac1b..94defa47 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -5696,8 +5696,6 @@ struct ggml_tensor * ggml_multi_add(
     result->src[0] = a;
     result->op_params[0] = n_experts;
 
-    //printf("%s: n_experts = %d\n", __func__, n_experts);
-
     return result;
 }
 
@@ -11277,8 +11275,6 @@ static void ggml_compute_forward_multi_add_f32(
     const int ith = params->ith;
     const int nth = params->nth;
 
-    //if (ith == 0) printf("%s: n_add = %d\n", __func__, n_add);
-
     const int nr  = ggml_nrows(dst);
 
     // rows per thread
@@ -11297,12 +11293,6 @@ static void ggml_compute_forward_multi_add_f32(
         memset(dst_ptr, 0, ne0*sizeof(float));
         for (int j = 0; j < n_add; ++j) {
             ggml_vec_add_f32(ne0, dst_ptr, dst_ptr, data + j*ne0);
-            //for (int l = 0; l < (int)ne0; ++l) {
-            //    if (!isfinite(dst_ptr[l])) {
-            //        printf("Oops: found %g for l = %d, j = %d in op %s\n", (double)dst_ptr[l], l, j, dst->name);
-            //        exit(1);
-            //    }
-            //}
         }
     }
 }
@@ -12003,7 +11993,6 @@ static void ggml_compute_forward_mul_f32(
 
     if (ggml_nelements(dst->src[1]) == 1 && ggml_is_contiguous(dst->src[0]) && ggml_is_contiguous(dst) &&
         dst->src[0]->type == GGML_TYPE_F32 && dst->src[1]->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
-        if (ith == 0) printf("%s: fast path\n", __func__);
         int64_t nelements = ggml_nelements(dst->src[0]);
         int64_t n_per_thread = (nelements + nth - 1)/nth;
         n_per_thread = MAX(1024, n_per_thread);
@@ -12043,14 +12032,6 @@ static void ggml_compute_forward_mul_f32(
             float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
 
             for (int64_t r = 0 ; r < nr0; ++r) {
-                //const float * x = src0_ptr + r*ne10;
-                //const float * y = src1_ptr;
-                //for (int j = 0; j < (int)ne10; ++j) {
-                //    if (!isfinite(x[j]) || !isfinite(y[j])) {
-                //        printf("Oops(%s, %s): found x = %g, y = %g for i3=%d,%d i2=%d,%d, i1=%d,%d, r=%d, j=%d\n", __func__, dst->name, (double)x[j], (double)y[j], (int)i03, (int)i13, (int)i02, (int)i12, (int)i01, (int)i11, (int)r, j);
-                //        exit(1);
-                //    }
-                //}
 #ifdef GGML_USE_ACCELERATE
                 UNUSED(ggml_vec_mul_f32);
 
@@ -12145,14 +12126,6 @@ static void ggml_compute_forward_div_f32(
             float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
 
             for (int64_t r = 0; r < nr0; ++r) {
-                //const float * x = src0_ptr + r*ne10;
-                //const float * y = src1_ptr;
-                //for (int j = 0; j < (int)ne10; ++j) {
-                //    if (!isfinite(x[j]) || !isfinite(y[j]) || fabsf(y[j]) < 1e-30f) {
-                //        printf("Oops(%s, %s): found x = %g, y = %g for i3=%d,%d i2=%d,%d, i1=%d,%d, r=%d, j=%d\n", __func__, dst->name, (double)x[j], (double)y[j], (int)i03, (int)i13, (int)i02, (int)i12, (int)i01, (int)i11, (int)r, j);
-                //        exit(1);
-                //    }
-                //}
 #ifdef GGML_USE_ACCELERATE
                 UNUSED(ggml_vec_div_f32);
 
@@ -12163,7 +12136,6 @@ static void ggml_compute_forward_div_f32(
             }
         }
     } else {
-        printf("Non-contiguous div?\n");
         // src1 is not contiguous
         for (int64_t ir = ith; ir < nr; ir += nth) {
             // src0 and dst are same shape => same indices
@@ -14790,36 +14762,6 @@ static void ggml_compute_forward_mul_mat_id(
         }
     }
 
-//#if GGML_USE_IQK_MULMAT
-//    if (ne13 == 1 && ids->ne[1] == 1 && dst->type == GGML_TYPE_F32) {
-//        if (src1->type != vec_dot_type) {
-//            ggml_barrier(params->shared);
-//        }
-//        //if (ith == 0) printf("ne0 = %d, nb = %d, %d, %d\n", (int)ne0, (int)nb0, (int)nb1, (int)nb2);
-//        const void * wdata    = (src1->type == vec_dot_type) ? src1->data : params->wdata;
-//        const size_t row_size = ggml_row_size(vec_dot_type, ne10);
-//        for (int id = 0; id < n_ids; ++id) {
-//            int i02 = *(const int32_t *) ((const char *) ids->data + id*ids->nb[0]);
-//            if (i02 >= 0 && i02 < n_as) {
-//                if (!iqk_mul_mat(ne01, 1, ne00,
-//                            src0->type, (const char *)src0->data + i02*nb02, src0->nb[1],
-//                            vec_dot_type, wdata, row_size,
-//                            (float *)dst->data + ne0*id, ne0, ith, nth)) goto IQK_MulMat_Not_Available0;
-//            } else {
-//                int npt = (ne0 + nth - 1)/nth;
-//                int npt64 = 64*((npt + 63)/64);
-//                int first = npt64*ith;
-//                int last  = MIN(first + npt64, ne0);
-//                if (last > first) {
-//                    memset((float *)dst->data + ne0*id + first, 0, (last - first)*sizeof(float));
-//                }
-//            }
-//        }
-//        return;
-//    }
-//IQK_MulMat_Not_Available0:;
-//#endif
-
 #define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)]
 
     GGML_ASSERT(ids->ne[1] == dst->ne[2]);
@@ -14827,6 +14769,8 @@ static void ggml_compute_forward_mul_mat_id(
         for (int id = 0; id < n_ids; ++id) {
             const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]);
             if (i02 < 0 || i02 >= n_as) {
+                // This is needed for SER. If fewer experts have been activated for this row, we need to
+                // clear it, else there could be garbage that leads to NaNs later on.
                 memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float));
             }
         }
@@ -14850,20 +14794,6 @@ static void ggml_compute_forward_mul_mat_id(
         }
     }
 
-    //{
-    //    int nrows = ggml_nrows(dst);
-    //    int npt = (nrows + nth - 1)/nth;
-    //    int first_row = ith*npt;
-    //    if (first_row < nrows) {
-    //        int last_row = MIN(first_row + npt, nrows);
-    //        for (int row = first_row; row < last_row; ++row) {
-    //            int i12 = row/ne1;
-    //            int i11 = row - i12*ne11;
-    //            memset((char *)dst->data + i11*nb1 + i12*nb2, 0, ne0*sizeof(float));
-    //        }
-    //    }
-    //}
-
     ggml_barrier(params->shared);
 
     // compute each matrix multiplication in sequence
@@ -14894,7 +14824,6 @@ static void ggml_compute_forward_mul_mat_id(
 IQK_MulMat_Not_Available:;
 #endif
 
-        //printf("Oops\n");
         if (((ggml_n_dims(src0) - 1) == 2) && gemv) {
             int64_t src0_cur_start = (ith * ne01) / nth;
             int64_t src0_cur_end   = ((ith + 1) * ne01) / nth;
@@ -15020,23 +14949,6 @@ IQK_MulMat_Not_Available:;
         }
     }
 
-    //ggml_barrier(params->shared);
-    //if (ith == 0) {
-    //    printf("%s: Checking %s for NaNs. ne = %d, %d, %d, nb = %d, %d, %d\n", __func__, dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)nb0, (int)nb1, (int)nb2);
-    //    printf("%s: src0 is %s, src1 is %s\n", __func__, src0->name, src1->name);
-    //for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) {
-    //    for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) {
-    //        const float * c = (const float *)((const char *)dst->data + i1*nb1 + i2*nb2);
-    //        for (int j = 0; j < (int)dst->ne[0]; ++j) {
-    //            if (!isfinite(c[j])) {
-    //                printf("Oops: found %g in %s for i0=%d, i1=%d, i2=%d\n", (double)c[j], dst->name, j, i1, i2);
-    //                exit(1);
-    //            }
-    //        }
-    //    }
-    //}
-    //}
-
 #undef MMID_MATRIX_ROW
 }
 
@@ -15122,6 +15034,8 @@ static void ggml_compute_forward_mul_mat_id_up_gate(
         for (int id = 0; id < n_ids; ++id) {
             const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]);
             if (i02 < 0 || i02 >= n_as) {
+                // This is needed for SER. If fewer experts have been activated for this row, we need to
+                // clear it, else there could be garbage that leads to NaNs later on.
                 memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float));
             }
         }
@@ -15145,20 +15059,6 @@ static void ggml_compute_forward_mul_mat_id_up_gate(
         }
     }
 
-    //{
-    //    int nrows = ggml_nrows(dst);
-    //    int npt = (nrows + nth - 1)/nth;
-    //    int first_row = ith*npt;
-    //    if (first_row < nrows) {
-    //        int last_row = MIN(first_row + npt, nrows);
-    //        for (int row = first_row; row < last_row; ++row) {
-    //            int i12 = row/ne1;
-    //            int i11 = row - i12*ne11;
-    //            memset((char *)dst->data + i11*nb1 + i12*nb2, 0, ne0*sizeof(float));
-    //        }
-    //    }
-    //}
-
     ggml_barrier(params->shared);
 
 
@@ -15210,23 +15110,6 @@ static void ggml_compute_forward_mul_mat_id_up_gate(
 //        }
     }
 
-    //ggml_barrier(params->shared);
-    //if (ith == 0) {
-    //    printf("%s: Checking %s for NaNs. ne = %d, %d, %d, nb = %d, %d, %d\n", __func__, dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)nb0, (int)nb1, (int)nb2);
-    //for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) {
-    //    for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) {
-    //        const float * c = (const float *)((const char *)dst->data + i1*nb1 + i2*nb2);
-    //        for (int j = 0; j < (int)dst->ne[0]; ++j) {
-    //            if (!isfinite(c[j])) {
-    //                printf("Oops: found %g in %s for i0=%d, i1=%d, i2=%d\n", (double)c[j], dst->name, j, i1, i2);
-    //                exit(1);
-    //            }
-    //        }
-    //    }
-    //}
-    //}
-
-
 #undef MMID_MATRIX_ROW
 }
 #endif
@@ -16248,23 +16131,6 @@ static void ggml_compute_forward_get_rows(
             }
     }
 
-    //ggml_barrier(params->shared);
-    //if (params->ith == 0) {
-    //    for (int i3 = 0; i3 < (int)dst->ne[3]; ++i3) {
-    //        for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) {
-    //            for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) {
-    //                const float * x = (const float *)((const char *)dst->data + i1*dst->nb[1] + i2*dst->nb[2] + i3*dst->nb[3]);
-    //                for (int j = 0; j < (int)dst->ne[0]; ++j) {
-    //                    if (!isfinite(x[j])) {
-    //                        printf("Oops(%s, %s): found %g for i0=%d, i1=%d, i2=%d, i3=%d\n", __func__, dst->name, (double)x[j], j, i1, i2, i3);
-    //                    }
-    //                }
-    //            }
-    //        }
-    //    }
-    //}
-
-
     //static bool first = true;
     //printf("ne0 = %d, ne1 = %d, ne2 = %d\n", dst->ne[0], dst->ne[1], dst->ne[2]);
     //if (first) {