mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 15:44:10 +00:00
iq4_xxs: Zen4
I noticed that iq4_xs is wrong on Zen4 (and possibly AVX2). Again the same mistake of packing int32_t back to int16_t, which overflows occasionally (just occasionally, that's why the result doesn't look completely wrong, so I didn't notice).
This commit is contained in:
@@ -1099,6 +1099,54 @@ struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> {
|
||||
const __m512i permute2 = _mm512_set_epi64(15, 14, 13, 12, 7, 6, 5, 4);
|
||||
};
|
||||
|
||||
struct DequantizerIQ4XXS final : public BaseDequantizer<block_iq4_xxs, true> {
|
||||
DequantizerIQ4XXS(const void * vx, size_t bx) : BaseDequantizer(vx, bx), values(load_iq4nl_values_512()) {}
|
||||
template <typename Q8>
|
||||
inline void new_block(int i, const Q8& q8, __m256 * accm, __m512i * scales) {
|
||||
auto scales128 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i *)x[i].scales));
|
||||
auto shifts = _mm_and_si128(_mm_cmpeq_epi16(_mm_and_si128(scales128, m1), m1), m4);
|
||||
scales128 = _mm_add_epi16(_mm_and_si128(scales128, mask), m127);
|
||||
auto scales_s = _mm_mullo_epi16(scales128, _mm_add_epi16(m128, shifts));
|
||||
s8k.accum_mins(scales_s, q8, i, d, accm);
|
||||
auto scales256 = MM256_SET_M128I(scales128, scales128);
|
||||
auto all_scales = _mm512_inserti32x8(_mm512_castsi256_si512(scales256), scales256, 1);
|
||||
scales[0] = _mm512_shuffle_epi8(all_scales, shuffles[0]);
|
||||
scales[1] = _mm512_shuffle_epi8(all_scales, shuffles[1]);
|
||||
scales[2] = _mm512_shuffle_epi8(all_scales, shuffles[2]);
|
||||
scales[3] = _mm512_shuffle_epi8(all_scales, shuffles[3]);
|
||||
prepare(x[i].qs);
|
||||
}
|
||||
inline void prepare(const uint8_t * q4) {
|
||||
bits.prepare64(q4);
|
||||
// We now have in bits.valuse[0]: 0...15, 32...47, 64...79, 96...111
|
||||
// bits.valuse[1]: 16..31, 48...63, 80...95, 112..127
|
||||
// etc.
|
||||
auto tmp = _mm512_permutex2var_epi64(bits.values[0], permute1, bits.values[1]);
|
||||
bits.values[1] = _mm512_shuffle_epi8(values, _mm512_permutex2var_epi64(bits.values[0], permute2, bits.values[1]));
|
||||
bits.values[0] = _mm512_shuffle_epi8(values, tmp);
|
||||
tmp = _mm512_permutex2var_epi64(bits.values[2], permute1, bits.values[3]);
|
||||
bits.values[3] = _mm512_shuffle_epi8(values, _mm512_permutex2var_epi64(bits.values[2], permute2, bits.values[3]));
|
||||
bits.values[2] = _mm512_shuffle_epi8(values, tmp);
|
||||
}
|
||||
|
||||
Q4Bits bits;
|
||||
Scales8KBase s8k;
|
||||
const __m512i values;
|
||||
const __m512i permute1 = _mm512_set_epi64(11, 10, 3, 2, 9, 8, 1, 0);
|
||||
const __m512i permute2 = _mm512_set_epi64(15, 14, 7, 6, 13, 12, 5, 4);
|
||||
const __m128i mask = _mm_set1_epi16(254);
|
||||
const __m128i m127 = _mm_set1_epi16(-127);
|
||||
const __m128i m128 = _mm_set1_epi16(-128);
|
||||
const __m128i m1 = _mm_set1_epi16(1);
|
||||
const __m128i m4 = _mm_set1_epi16(4);
|
||||
const __m512i shuffles[4] = {
|
||||
_mm512_inserti32x8(_mm512_set1_epi16(0x0100), _mm256_set1_epi16(0x0302), 1),
|
||||
_mm512_inserti32x8(_mm512_set1_epi16(0x0504), _mm256_set1_epi16(0x0706), 1),
|
||||
_mm512_inserti32x8(_mm512_set1_epi16(0x0908), _mm256_set1_epi16(0x0b0a), 1),
|
||||
_mm512_inserti32x8(_mm512_set1_epi16(0x0d0c), _mm256_set1_epi16(0x0f0e), 1),
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Q8>
|
||||
inline void compute_block(int iy, int i, float d, const Q8& q8, const __m512i * values, const __m512i * scales, __m512 * accd) {
|
||||
const __m512i p1 = _mm512_dpbusd_epi32(_mm512_setzero_si512(), values[0], q8.load_quants64(iy, i, 0));
|
||||
@@ -3672,7 +3720,8 @@ template <typename Dequantizer> void MulMat::set_functions(MulMat& m) {
|
||||
if constexpr (std::is_same_v<Dequantizer, DequantizerIQ6K> ||
|
||||
std::is_same_v<Dequantizer, DequantizerIQ5K> ||
|
||||
std::is_same_v<Dequantizer, DequantizerIQ4K> ||
|
||||
std::is_same_v<Dequantizer, DequantizerIQ3K>) {
|
||||
std::is_same_v<Dequantizer, DequantizerIQ3K> ||
|
||||
std::is_same_v<Dequantizer, DequantizerIQ4XXS>) {
|
||||
m.funcs[0] = mul_mat_iqX_k_q8_K_AVX512<Dequantizer, 1>;
|
||||
m.funcs[1] = mul_mat_iqX_k_q8_K_AVX512<Dequantizer, 2>;
|
||||
m.funcs[2] = mul_mat_iqX_k_q8_K_AVX512<Dequantizer, 3>;
|
||||
@@ -3832,6 +3881,10 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) {
|
||||
assert (ne00 % QK_K == 0);
|
||||
MulMat::set_functions<DequantizerIQ4XS>(mm);
|
||||
break;
|
||||
case GGML_TYPE_IQ4_XXS:
|
||||
assert (ne00 % QK_K == 0);
|
||||
MulMat::set_functions<DequantizerIQ4XXS>(mm);
|
||||
break;
|
||||
case GGML_TYPE_IQ2_K:
|
||||
assert (ne00 % QK_K == 0);
|
||||
MulMat::set_functions<DequantizerIQ2K>(mm);
|
||||
|
||||
Reference in New Issue
Block a user