mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Fix KT quantization yet again
This commit is contained in:
@@ -8733,6 +8733,11 @@ void quantize_row_iq1_kt_impl(const float * x, void * vy, int n_per_row, const f
|
|||||||
float ax = std::abs(xb[j]);
|
float ax = std::abs(xb[j]);
|
||||||
amax = std::max(amax, ax);
|
amax = std::max(amax, ax);
|
||||||
}
|
}
|
||||||
|
if (amax < 1e-16f) {
|
||||||
|
scales[ib] = 0.0f;
|
||||||
|
for (int ig = 0; ig < Q::kNg; ++ig) all_idx[(ibl*Q::kSuperBlockSize + ib*Q::kBlockSize)/Q::kGroupSize + ig] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
float scale_0 = std::max(90.f, 124.f*amax/amax_row);
|
float scale_0 = std::max(90.f, 124.f*amax/amax_row);
|
||||||
quantizer.find_best_match( amax/scale_0, xb, weight, best_idx);
|
quantizer.find_best_match( amax/scale_0, xb, weight, best_idx);
|
||||||
auto [dp, score_p] = quantizer.find_best_scale(xb, weight, best_idx);
|
auto [dp, score_p] = quantizer.find_best_scale(xb, weight, best_idx);
|
||||||
@@ -8998,6 +9003,11 @@ void quantize_row_iq2_kt_impl(const float * x, void * vy, int n_per_row, const f
|
|||||||
float ax = std::abs(xb[j]);
|
float ax = std::abs(xb[j]);
|
||||||
amax = std::max(amax, ax);
|
amax = std::max(amax, ax);
|
||||||
}
|
}
|
||||||
|
if (amax < 1e-16f) {
|
||||||
|
scales[ib] = 0.0f;
|
||||||
|
for (int ig = 0; ig < Q::kNg; ++ig) all_idx[(ibl*Q::kSuperBlockSize + ib*Q::kBlockSize)/Q::kGroupSize + ig] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
float scale_0 = std::max(90.f, 124.f*amax/amax_row);
|
float scale_0 = std::max(90.f, 124.f*amax/amax_row);
|
||||||
quantizer.find_best_match( amax/scale_0, xb, weight, best_idx);
|
quantizer.find_best_match( amax/scale_0, xb, weight, best_idx);
|
||||||
auto [dp, score_p] = quantizer.find_best_scale(xb, weight, best_idx);
|
auto [dp, score_p] = quantizer.find_best_scale(xb, weight, best_idx);
|
||||||
@@ -9289,8 +9299,10 @@ void quantize_row_iq3_kt_impl(const float * x, void * vy, int n_per_row, const f
|
|||||||
xaux[j] = ax;
|
xaux[j] = ax;
|
||||||
amax = std::max(amax, ax);
|
amax = std::max(amax, ax);
|
||||||
}
|
}
|
||||||
scales[ib] = 0;
|
if (amax < 1e-16f) {
|
||||||
if (!amax) continue;
|
scales[ib] = 0.0f;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
//quantizer.find_best_match(amax/96.f, xaux, weight, best_idx+Q::kNg);
|
//quantizer.find_best_match(amax/96.f, xaux, weight, best_idx+Q::kNg);
|
||||||
//scales[ib] = quantizer.find_best_scale(xaux, weight, best_idx+Q::kNg).first;
|
//scales[ib] = quantizer.find_best_scale(xaux, weight, best_idx+Q::kNg).first;
|
||||||
@@ -9577,7 +9589,7 @@ void quantize_row_iq4_kt_impl(const float * x, void * vy, int n_per_row, const f
|
|||||||
float ax = std::abs(xaux[j]);
|
float ax = std::abs(xaux[j]);
|
||||||
amax = std::max(amax, ax);
|
amax = std::max(amax, ax);
|
||||||
}
|
}
|
||||||
if (!amax) {
|
if (amax < 1e-16f) {
|
||||||
scales[ib] = 0;
|
scales[ib] = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user