mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 23:54:10 +00:00
iq4_xxs: AVX2
This commit is contained in:
@@ -1511,16 +1511,6 @@ struct IQXKScales {
|
||||
inline void process(int i, float d, uint16_t extra, __m128i scales8, const Q8& q8, __m256 * accm, __m256i * scales) const {
|
||||
auto scales16 = _mm256_cvtepi8_epi16(_mm_shuffle_epi8(scales8, hshuff));
|
||||
process(i, d, extra, scales16, q8, accm, scales);
|
||||
//auto extra128 = _mm_set1_epi16(extra);
|
||||
//extra128 = _mm_cmpeq_epi8(_mm_and_si128(extra128, emask), emask);
|
||||
//extra128 = _mm_and_si128(extra128, eshift);
|
||||
//extra128 = _mm_shuffle_epi8(extra128, eshuffle);
|
||||
//auto scales_s = _mm256_mullo_epi16(scales16, _mm256_add_epi16(min, _mm256_cvtepi8_epi16(extra128)));
|
||||
//for (int iy = 0; iy < Q8::nrc_y; ++iy) {
|
||||
// const __m256i prod = _mm256_madd_epi16(scales_s, q8.load_bsums(iy, i));
|
||||
// accm[iy] = _mm256_fmadd_ps(_mm256_set1_ps(d * q8.scale(iy, i)), _mm256_cvtepi32_ps(prod), accm[iy]);
|
||||
//}
|
||||
//prepare_scales_16(scales16, scales);
|
||||
}
|
||||
template <typename Q8>
|
||||
inline void process(int i, float d, uint16_t extra, __m256i scales16, const Q8& q8, __m256 * accm, __m256i * scales) const {
|
||||
@@ -1750,6 +1740,37 @@ struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> {
|
||||
const __m256i mh = _mm256_set1_epi8(-128); // to avoid stupid warning about 0x80 overflowing
|
||||
};
|
||||
|
||||
struct DequantizerIQ4XXS final : public BaseDequantizer<block_iq4_xxs, true> {
|
||||
DequantizerIQ4XXS(const void * vx, size_t bx) : BaseDequantizer(vx, bx), values(load_iq4nl_values_256()) {}
|
||||
template <typename Q8>
|
||||
inline __m256i new_block(int i, const Q8& q8, __m256 * accd) {
|
||||
auto scales128 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i *)x[i].scales));
|
||||
auto shifts = _mm_and_si128(_mm_cmpeq_epi16(_mm_and_si128(scales128, m1), m1), m4);
|
||||
scales128 = _mm_add_epi16(_mm_and_si128(scales128, mask), m127);
|
||||
auto scales_s = _mm_mullo_epi16(scales128, _mm_add_epi16(m128, shifts));
|
||||
s8k.accum_mins(scales_s, q8, i, d, accd);
|
||||
return MM256_SET_M128I(scales128, scales128);
|
||||
}
|
||||
inline void prepare(int i, int j) {
|
||||
bits.prepare16(x[i].qs, j);
|
||||
bits.values[0] = _mm256_shuffle_epi8(values, bits.values[0]);
|
||||
bits.values[1] = _mm256_shuffle_epi8(values, bits.values[1]);
|
||||
bits.values[2] = _mm256_shuffle_epi8(values, bits.values[2]);
|
||||
bits.values[3] = _mm256_shuffle_epi8(values, bits.values[3]);
|
||||
}
|
||||
|
||||
Q4Bits bits;
|
||||
Scales8KBase s8k;
|
||||
const __m256i values;
|
||||
const __m128i mask = _mm_set1_epi16(254);
|
||||
const __m128i m127 = _mm_set1_epi16(-127);
|
||||
const __m128i m128 = _mm_set1_epi16(-128);
|
||||
const __m128i m1 = _mm_set1_epi16(1);
|
||||
const __m128i m4 = _mm_set1_epi16(4);
|
||||
const __m256i shuff1 = _mm256_set_epi64x(0x0706070605040504, 0x0302030201000100, 0x0706070605040504, 0x0302030201000100);
|
||||
const __m256i shuff2 = _mm256_set_epi64x(0x0f0e0f0e0d0c0d0c, 0x0b0a0b0a09080908, 0x0f0e0f0e0d0c0d0c, 0x0b0a0b0a09080908);
|
||||
};
|
||||
|
||||
struct DequantizerQ5K final : public BaseDequantizer<block_q5_K> {
|
||||
DequantizerQ5K(const void * vx, size_t bx) : BaseDequantizer(vx, bx) {}
|
||||
template <typename Q8>
|
||||
|
||||
Reference in New Issue
Block a user