iq2_tn: AVX512

Just reusing the k-quants template gets us to PP-512 = 376 t/s,
TG-128 = 47.6 t/s for TriLM-3.9B.
This commit is contained in:
Iwan Kawrakow
2024-08-05 14:53:49 +03:00
parent 1b41d792ec
commit dd0b08d1d8

View File

@@ -692,6 +692,18 @@ struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> {
};
struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> {
DequantizerIQ2TN(const void * vx, size_t bx) : BaseDequantizer(vx, bx) {}
template <typename Q8>
inline void new_block(int i, const Q8& q8, __m256 * accm, __m512i * scales) {
d = GGML_FP16_TO_FP32(x[i].d);
bits.prepare(x[i].qs);
process_mins_16(_mm256_set1_epi16(1), q8, i, -d, accm);
scales[0] = scales[1] = _mm512_set1_epi16(1);
}
Q2Bits bits;
};
struct DequantizerQ3K final : public BaseDequantizer<block_q3_K> {
DequantizerQ3K(const void * vx, size_t bx) : BaseDequantizer(vx, bx) {}
template <typename Q8>
@@ -3156,6 +3168,10 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) {
assert (ne00 % QK_K == 0);
MulMat::set_functions<DequantizerQ2K>(mm);
break;
case GGML_TYPE_IQ2_TN:
assert (ne00 % QK_K == 0);
MulMat::set_functions<DequantizerIQ2TN>(mm);
break;
case GGML_TYPE_Q3_K:
assert (ne00 % QK_K == 0);
MulMat::set_functions<DequantizerQ3K>(mm);