diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index 4baa11c1..eb760a6d 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -3454,6 +3454,7 @@ void QuantizerIQKT::find_best_ma float sx[8]; int index[8]; auto vid = _mm256_set1_ps(id); + auto add8 = _mm256_set1_epi32(8); for (int l = 0; l < kNg; ++l) { auto xl = xb + 8*l; auto wl = weight + 8*l; @@ -3462,6 +3463,7 @@ void QuantizerIQKT::find_best_ma auto vbest = _mm256_set1_ps(INFINITY); auto best_index = _mm256_set1_epi32(-1); float best = INFINITY; int jbest = -1; + auto idx = add_idx; for (int j = 0; j < ncluster; j += 8) { auto idx = _mm256_add_epi32(_mm256_set1_epi32(j), add_idx); for (int i = 0; i < 8; ++i) { @@ -3474,6 +3476,7 @@ void QuantizerIQKT::find_best_ma best_index = _mm256_or_si256(_mm256_and_si256(_mm256_castps_si256(mask), idx), _mm256_andnot_si256(_mm256_castps_si256(mask), best_index)); vbest = _mm256_min_ps(vbest, score); + idx = _mm256_add_epi32(idx, add8); } _mm256_store_ps(sx, vbest); _mm256_store_si256((__m256i *)index, best_index); @@ -3487,8 +3490,7 @@ void QuantizerIQKT::find_best_ma vbest = _mm256_set1_ps(INFINITY); best_index = _mm256_set1_epi32(-1); best = INFINITY; jbest = -1; - auto idx = add_idx; - auto add8 = _mm256_set1_epi32(8); + idx = add_idx; for (int j = 0; j < int(points.size()); j += 8) { //auto idx = _mm256_add_epi32(_mm256_set1_epi32(j), add_idx); for (int i = 0; i < 8; ++i) {