mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-22 23:49:23 +00:00
MMQ for iq5_k: slightly faster
This commit is contained in:
@@ -2503,6 +2503,8 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
|
||||
constexpr int qstep = 8;
|
||||
const int kqsx = threadIdx.x % qstep;
|
||||
|
||||
auto values = iq5nl_values;
|
||||
|
||||
uint32_t aux32[2];
|
||||
const uint8_t * aux8 = (const uint8_t *)aux32;
|
||||
#pragma unroll
|
||||
@@ -2522,16 +2524,15 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
|
||||
for (int l = 0; l < qstep/2; ++l) {
|
||||
|
||||
const int ql = get_int_b4(bxi->qs, kqsx + qstep*l);
|
||||
aux32[0] = ((ql >> 0) & 0x0f0f0f0f) | ((qh & 0x01010101) << 4);
|
||||
aux32[1] = ((ql >> 4) & 0x0f0f0f0f) | ((qh & 0x02020202) << 3);
|
||||
aux32[0] = ((ql >> 0) & 0x0f0f0f0f) | ((qh & 0x01010101) << 4) | ((extra & 1) * 0x20202020); // this is very slightly faster
|
||||
aux32[1] = ((ql >> 4) & 0x0f0f0f0f) | ((qh & 0x02020202) << 3) | ((extra & 4) * 0x08080808); // then the version below
|
||||
//aux32[0] = ((ql >> 0) & 0x0f0f0f0f) | ((qh & 0x01010101) << 4) | ((extra & 1) ? 0x20202020 : 0);
|
||||
//aux32[1] = ((ql >> 4) & 0x0f0f0f0f) | ((qh & 0x02020202) << 3) | ((extra & 4) ? 0x20202020 : 0);
|
||||
qh >>= 2;
|
||||
|
||||
auto values_l = iq5nl_values + ((extra & 1) << 5);
|
||||
auto values_h = iq5nl_values + ((extra & 4) << 3);
|
||||
extra >>= 4;
|
||||
|
||||
const char4 val0 = make_char4(values_l[aux8[0]], values_l[aux8[1]], values_l[aux8[2]], values_l[aux8[3]]);
|
||||
const char4 val1 = make_char4(values_h[aux8[4]], values_h[aux8[5]], values_h[aux8[6]], values_h[aux8[7]]);
|
||||
const char4 val0 = make_char4(values[aux8[0]], values[aux8[1]], values[aux8[2]], values[aux8[3]]);
|
||||
const char4 val1 = make_char4(values[aux8[4]], values[aux8[5]], values[aux8[6]], values[aux8[7]]);
|
||||
|
||||
#ifdef INT8_MMA_AVAILABLE
|
||||
x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + kqsx + 16*l + 0] = *(const int *)&val0;
|
||||
|
||||
Reference in New Issue
Block a user