mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 07:04:11 +00:00
q5_1
64.2 t/s -> 114.9 t/s. There is no repacked variant.
This commit is contained in:
@@ -2893,6 +2893,22 @@ struct DeqQ50 {
|
||||
const uint8x16_t mh = vdupq_n_u8(0xf0);
|
||||
};
|
||||
|
||||
struct DeqQ51 {
|
||||
|
||||
inline int8x16x2_t dequant(const block_q5_1& x) const {
|
||||
int8x16x2_t r;
|
||||
bits.prepare1(x.qs, r.val);
|
||||
auto qh = x.qh;
|
||||
r.val[0] = vreinterpretq_s8_u8(vorrq_u8(vreinterpretq_u8_s8(r.val[0]), vandq_u8(mh, hbits.to_bytes(qh+0))));
|
||||
r.val[1] = vreinterpretq_s8_u8(vorrq_u8(vreinterpretq_u8_s8(r.val[1]), vandq_u8(mh, hbits.to_bytes(qh+2))));
|
||||
return r;
|
||||
}
|
||||
|
||||
Q4LegacyBits bits;
|
||||
HighBit5Legacy hbits;
|
||||
const uint8x16_t mh = vdupq_n_u8(0x10);
|
||||
};
|
||||
|
||||
struct DeqQ60 {
|
||||
|
||||
inline int8x16x2_t dequant(const block_q6_0& x) const {
|
||||
@@ -2992,7 +3008,7 @@ bool iqk_convert_legacy_quants_q8_r8(int type, int n, const void * vx, size_t bx
|
||||
case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8<block_q4_0, DeqQ40>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8<block_q4_1, DeqQ41>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8<block_q5_0, DeqQ50>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8<block_q5_1, Q5_1_Dequantizer<block_q5_1>>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8<block_q5_1, DeqQ51>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8<block_q6_0, DeqQ60>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8<block_iq4_nl, DeqIQ4NL>(n, vx, bx, vy, nrc_x); break;
|
||||
case GGML_TYPE_Q8_0 : iqk_convert_qX_q80_r8<block_q8_0, DeqQ80>(n, vx, bx, vy, nrc_x); break;
|
||||
|
||||
@@ -274,6 +274,7 @@ struct MulMat {
|
||||
case GGML_TYPE_Q4_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;
|
||||
case GGML_TYPE_Q4_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type;
|
||||
case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;
|
||||
case GGML_TYPE_Q5_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type;
|
||||
case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;
|
||||
case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;
|
||||
case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;
|
||||
|
||||
Reference in New Issue
Block a user