mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-27 01:49:28 +00:00
iq4_xxs: this looks very viable compared to iq4_xs
At the same 4.25 bpw PPL is always better, for some models significantly better. I'll rename to iq4_ks and keep it.
This commit is contained in:
@@ -442,10 +442,10 @@ typedef struct {
|
||||
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
||||
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/64];
|
||||
uint8_t scales[QK_K/32];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xxs;
|
||||
static_assert(sizeof(block_iq4_xxs) == QK_K/64 + QK_K/2, "wrong iq4_xxs block size/padding");
|
||||
static_assert(sizeof(block_iq4_xxs) == QK_K/32 + QK_K/2, "wrong iq4_xxs block size/padding");
|
||||
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
|
||||
@@ -626,15 +626,15 @@ static __global__ void dequantize_block_iq4_xxs(const void * __restrict__ vx, ds
|
||||
const int64_t i = ii - (row*n_per_row)/QK_K;
|
||||
|
||||
const int64_t tid = threadIdx.x;
|
||||
const int64_t ib = tid/8; // 0...3
|
||||
const int64_t il = tid%8; // 0...7
|
||||
dst_t * y = yy + ii*QK_K + 64*ib + 4*il;
|
||||
const uint8_t * q4 = x[i].qs + 32*ib + 4*il;
|
||||
const int64_t il = tid/8; // 0...3
|
||||
const int64_t ib = tid%8; // 0...7
|
||||
dst_t * y = yy + ii*QK_K + 32*ib + 4*il;
|
||||
const uint8_t * q4 = x[i].qs + 16*ib + 4*il;
|
||||
const float d = scale * ((x[i].scales[ib] & 254) - 127);
|
||||
const int8_t * values = iq4k_values + ((x[i].scales[ib] & 1) << 4);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
y[j+ 0] = d * values[q4[j] & 0xf];
|
||||
y[j+32] = d * values[q4[j] >> 4];
|
||||
y[j+16] = d * values[q4[j] >> 4];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2340,7 +2340,7 @@ void quantize_row_iq4_xxs(const float * x, void * y, int64_t k) {
|
||||
|
||||
size_t quantize_iq4_xxs(const float * src, void * dst, int64_t nrows, int64_t n_per_row, const float * imatrix) {
|
||||
//printf("============ %s(%d, %d)\n", __func__, int(nrows), int(n_per_row));
|
||||
constexpr int kBlockSize = 64; //128;
|
||||
constexpr int kBlockSize = 32; //128;
|
||||
GGML_ASSERT(n_per_row%QK_K == 0);
|
||||
auto row_size = ggml_row_size(GGML_TYPE_IQ4_XXS, n_per_row);
|
||||
char * qrow = (char *)dst;
|
||||
@@ -2355,7 +2355,7 @@ size_t quantize_iq4_xxs(const float * src, void * dst, int64_t nrows, int64_t n_
|
||||
}
|
||||
|
||||
void dequantize_row_iq4_xxs(const block_iq4_xxs * x, float * y, int64_t k) {
|
||||
constexpr int kBlockSize = 64; //128;
|
||||
constexpr int kBlockSize = 32; //128;
|
||||
GGML_ASSERT(k%QK_K == 0);
|
||||
const float * dptr = (const float *)x;
|
||||
float d = *dptr;
|
||||
|
||||
Reference in New Issue
Block a user