diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index ecdf6c24..66cfed83 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -1053,9 +1053,16 @@ void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const fl for (int j = 0; j < kBlockSize; ++j) weight[j] = 0.25f*sigma2 + xb[j]*xb[j]; } sw[ib] = 0; + float amax = 0; for (int j = 0; j < kBlockSize; ++j) { sw[ib] += weight[j]; pairs[j] = {xb[j], j}; + float ax = std::abs(xb[j]); + amax = std::max(amax, ax); + } + if (amax < 1e-16f) { + scales[ib] = 0; + continue; } std::sort(pairs.begin(), pairs.end()); sumx[0] = sumw[0] = 0; @@ -1269,9 +1276,16 @@ void quantize_row_iq2_ks_impl(const float * x, void * vy, int n_per_row, const f for (int j = 0; j < kBlockSize; ++j) weight[j] = 0.25f*sigma2 + xb[j]*xb[j]; } sw[ib] = 0; + float amax = 0; for (int j = 0; j < kBlockSize; ++j) { sw[ib] += weight[j]; pairs[j] = {xb[j], j}; + float ax = std::abs(xb[j]); + amax = std::max(amax, ax); + } + if (amax < 1e-16f) { + scales[ib] = 0; + continue; } //float amax = 0, max = 0; //for (int j = 0; j < kBlockSize; ++j) { @@ -1678,7 +1692,7 @@ void quantize_row_iq2_kl_impl(const float * x, void * vy, int n_per_row, const f amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -1929,7 +1943,7 @@ static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, c amax = ax; max = xb[j]; } } - if (amax < 1e-9f) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -2216,7 +2230,7 @@ static void quantize_row_iq3_ks_impl(const int super_block_size, const int block amax = ax; max = xb[j]; } } - if (amax < 1e-9f) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -2544,7 +2558,7 @@ static void quantize_row_iq4_k_impl_bs16(const int super_block_size, const int b amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -2862,7 +2876,7 @@ void quantize_row_iq5_k_impl(const float * x, void * vy, int n_per_row, const fl amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -3216,7 +3230,7 @@ void quantize_row_iq6_k_impl(const float * x, void * vy, int n_per_row, const fl amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -3918,7 +3932,7 @@ static void quantize_row_iq4_k_impl_bs128(const int super_block_size, const int amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -4167,7 +4181,7 @@ static void quantize_row_iq5_ks_impl(const int super_block_size, const int block amax = ax; max = xb[j]; } } - if (amax < 1e-15f) { + if (amax < 1e-16f) { scales[ib] = 0; continue; } @@ -4470,7 +4484,7 @@ static void quantize_row_iq4_kss_impl(int n_per_row, const float * x, char * cy, amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < 1e-16f) { scales[ib] = 0; continue; }