diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 6a80ac1b..94defa47 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -5696,8 +5696,6 @@ struct ggml_tensor * ggml_multi_add( result->src[0] = a; result->op_params[0] = n_experts; - //printf("%s: n_experts = %d\n", __func__, n_experts); - return result; } @@ -11277,8 +11275,6 @@ static void ggml_compute_forward_multi_add_f32( const int ith = params->ith; const int nth = params->nth; - //if (ith == 0) printf("%s: n_add = %d\n", __func__, n_add); - const int nr = ggml_nrows(dst); // rows per thread @@ -11297,12 +11293,6 @@ static void ggml_compute_forward_multi_add_f32( memset(dst_ptr, 0, ne0*sizeof(float)); for (int j = 0; j < n_add; ++j) { ggml_vec_add_f32(ne0, dst_ptr, dst_ptr, data + j*ne0); - //for (int l = 0; l < (int)ne0; ++l) { - // if (!isfinite(dst_ptr[l])) { - // printf("Oops: found %g for l = %d, j = %d in op %s\n", (double)dst_ptr[l], l, j, dst->name); - // exit(1); - // } - //} } } } @@ -12003,7 +11993,6 @@ static void ggml_compute_forward_mul_f32( if (ggml_nelements(dst->src[1]) == 1 && ggml_is_contiguous(dst->src[0]) && ggml_is_contiguous(dst) && dst->src[0]->type == GGML_TYPE_F32 && dst->src[1]->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - if (ith == 0) printf("%s: fast path\n", __func__); int64_t nelements = ggml_nelements(dst->src[0]); int64_t n_per_thread = (nelements + nth - 1)/nth; n_per_thread = MAX(1024, n_per_thread); @@ -12043,14 +12032,6 @@ static void ggml_compute_forward_mul_f32( float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); for (int64_t r = 0 ; r < nr0; ++r) { - //const float * x = src0_ptr + r*ne10; - //const float * y = src1_ptr; - //for (int j = 0; j < (int)ne10; ++j) { - // if (!isfinite(x[j]) || !isfinite(y[j])) { - // printf("Oops(%s, %s): found x = %g, y = %g for i3=%d,%d i2=%d,%d, i1=%d,%d, r=%d, j=%d\n", __func__, dst->name, (double)x[j], (double)y[j], (int)i03, (int)i13, (int)i02, (int)i12, (int)i01, (int)i11, (int)r, j); - // exit(1); - // } - //} #ifdef GGML_USE_ACCELERATE UNUSED(ggml_vec_mul_f32); @@ -12145,14 +12126,6 @@ static void ggml_compute_forward_div_f32( float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); for (int64_t r = 0; r < nr0; ++r) { - //const float * x = src0_ptr + r*ne10; - //const float * y = src1_ptr; - //for (int j = 0; j < (int)ne10; ++j) { - // if (!isfinite(x[j]) || !isfinite(y[j]) || fabsf(y[j]) < 1e-30f) { - // printf("Oops(%s, %s): found x = %g, y = %g for i3=%d,%d i2=%d,%d, i1=%d,%d, r=%d, j=%d\n", __func__, dst->name, (double)x[j], (double)y[j], (int)i03, (int)i13, (int)i02, (int)i12, (int)i01, (int)i11, (int)r, j); - // exit(1); - // } - //} #ifdef GGML_USE_ACCELERATE UNUSED(ggml_vec_div_f32); @@ -12163,7 +12136,6 @@ static void ggml_compute_forward_div_f32( } } } else { - printf("Non-contiguous div?\n"); // src1 is not contiguous for (int64_t ir = ith; ir < nr; ir += nth) { // src0 and dst are same shape => same indices @@ -14790,36 +14762,6 @@ static void ggml_compute_forward_mul_mat_id( } } -//#if GGML_USE_IQK_MULMAT -// if (ne13 == 1 && ids->ne[1] == 1 && dst->type == GGML_TYPE_F32) { -// if (src1->type != vec_dot_type) { -// ggml_barrier(params->shared); -// } -// //if (ith == 0) printf("ne0 = %d, nb = %d, %d, %d\n", (int)ne0, (int)nb0, (int)nb1, (int)nb2); -// const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; -// const size_t row_size = ggml_row_size(vec_dot_type, ne10); -// for (int id = 0; id < n_ids; ++id) { -// int i02 = *(const int32_t *) ((const char *) ids->data + id*ids->nb[0]); -// if (i02 >= 0 && i02 < n_as) { -// if (!iqk_mul_mat(ne01, 1, ne00, -// src0->type, (const char *)src0->data + i02*nb02, src0->nb[1], -// vec_dot_type, wdata, row_size, -// (float *)dst->data + ne0*id, ne0, ith, nth)) goto IQK_MulMat_Not_Available0; -// } else { -// int npt = (ne0 + nth - 1)/nth; -// int npt64 = 64*((npt + 63)/64); -// int first = npt64*ith; -// int last = MIN(first + npt64, ne0); -// if (last > first) { -// memset((float *)dst->data + ne0*id + first, 0, (last - first)*sizeof(float)); -// } -// } -// } -// return; -// } -//IQK_MulMat_Not_Available0:; -//#endif - #define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)] GGML_ASSERT(ids->ne[1] == dst->ne[2]); @@ -14827,6 +14769,8 @@ static void ggml_compute_forward_mul_mat_id( for (int id = 0; id < n_ids; ++id) { const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]); if (i02 < 0 || i02 >= n_as) { + // This is needed for SER. If fewer experts have been activated for this row, we need to + // clear it, else there could be garbage that leads to NaNs later on. memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float)); } } @@ -14850,20 +14794,6 @@ static void ggml_compute_forward_mul_mat_id( } } - //{ - // int nrows = ggml_nrows(dst); - // int npt = (nrows + nth - 1)/nth; - // int first_row = ith*npt; - // if (first_row < nrows) { - // int last_row = MIN(first_row + npt, nrows); - // for (int row = first_row; row < last_row; ++row) { - // int i12 = row/ne1; - // int i11 = row - i12*ne11; - // memset((char *)dst->data + i11*nb1 + i12*nb2, 0, ne0*sizeof(float)); - // } - // } - //} - ggml_barrier(params->shared); // compute each matrix multiplication in sequence @@ -14894,7 +14824,6 @@ static void ggml_compute_forward_mul_mat_id( IQK_MulMat_Not_Available:; #endif - //printf("Oops\n"); if (((ggml_n_dims(src0) - 1) == 2) && gemv) { int64_t src0_cur_start = (ith * ne01) / nth; int64_t src0_cur_end = ((ith + 1) * ne01) / nth; @@ -15020,23 +14949,6 @@ IQK_MulMat_Not_Available:; } } - //ggml_barrier(params->shared); - //if (ith == 0) { - // printf("%s: Checking %s for NaNs. ne = %d, %d, %d, nb = %d, %d, %d\n", __func__, dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)nb0, (int)nb1, (int)nb2); - // printf("%s: src0 is %s, src1 is %s\n", __func__, src0->name, src1->name); - //for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) { - // for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) { - // const float * c = (const float *)((const char *)dst->data + i1*nb1 + i2*nb2); - // for (int j = 0; j < (int)dst->ne[0]; ++j) { - // if (!isfinite(c[j])) { - // printf("Oops: found %g in %s for i0=%d, i1=%d, i2=%d\n", (double)c[j], dst->name, j, i1, i2); - // exit(1); - // } - // } - // } - //} - //} - #undef MMID_MATRIX_ROW } @@ -15122,6 +15034,8 @@ static void ggml_compute_forward_mul_mat_id_up_gate( for (int id = 0; id < n_ids; ++id) { const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]); if (i02 < 0 || i02 >= n_as) { + // This is needed for SER. If fewer experts have been activated for this row, we need to + // clear it, else there could be garbage that leads to NaNs later on. memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float)); } } @@ -15145,20 +15059,6 @@ static void ggml_compute_forward_mul_mat_id_up_gate( } } - //{ - // int nrows = ggml_nrows(dst); - // int npt = (nrows + nth - 1)/nth; - // int first_row = ith*npt; - // if (first_row < nrows) { - // int last_row = MIN(first_row + npt, nrows); - // for (int row = first_row; row < last_row; ++row) { - // int i12 = row/ne1; - // int i11 = row - i12*ne11; - // memset((char *)dst->data + i11*nb1 + i12*nb2, 0, ne0*sizeof(float)); - // } - // } - //} - ggml_barrier(params->shared); @@ -15210,23 +15110,6 @@ static void ggml_compute_forward_mul_mat_id_up_gate( // } } - //ggml_barrier(params->shared); - //if (ith == 0) { - // printf("%s: Checking %s for NaNs. ne = %d, %d, %d, nb = %d, %d, %d\n", __func__, dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)nb0, (int)nb1, (int)nb2); - //for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) { - // for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) { - // const float * c = (const float *)((const char *)dst->data + i1*nb1 + i2*nb2); - // for (int j = 0; j < (int)dst->ne[0]; ++j) { - // if (!isfinite(c[j])) { - // printf("Oops: found %g in %s for i0=%d, i1=%d, i2=%d\n", (double)c[j], dst->name, j, i1, i2); - // exit(1); - // } - // } - // } - //} - //} - - #undef MMID_MATRIX_ROW } #endif @@ -16248,23 +16131,6 @@ static void ggml_compute_forward_get_rows( } } - //ggml_barrier(params->shared); - //if (params->ith == 0) { - // for (int i3 = 0; i3 < (int)dst->ne[3]; ++i3) { - // for (int i2 = 0; i2 < (int)dst->ne[2]; ++i2) { - // for (int i1 = 0; i1 < (int)dst->ne[1]; ++i1) { - // const float * x = (const float *)((const char *)dst->data + i1*dst->nb[1] + i2*dst->nb[2] + i3*dst->nb[3]); - // for (int j = 0; j < (int)dst->ne[0]; ++j) { - // if (!isfinite(x[j])) { - // printf("Oops(%s, %s): found %g for i0=%d, i1=%d, i2=%d, i3=%d\n", __func__, dst->name, (double)x[j], j, i1, i2, i3); - // } - // } - // } - // } - // } - //} - - //static bool first = true; //printf("ne0 = %d, ne1 = %d, ne2 = %d\n", dst->ne[0], dst->ne[1], dst->ne[2]); //if (first) {