mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 23:24:13 +00:00
Testing Trellis quantization: playing with scales and generators
This commit is contained in:
@@ -250,6 +250,34 @@ static void test_roundtrip_on_layer(
|
||||
}
|
||||
}
|
||||
|
||||
static inline int nearest_int(float fval) {
|
||||
assert(fval <= 4194303.f);
|
||||
float val = fval + 12582912.f;
|
||||
int i; memcpy(&i, &val, sizeof(int));
|
||||
return (i & 0x007fffff) - 0x00400000;
|
||||
}
|
||||
|
||||
static const int8_t scale_values[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||
|
||||
//static std::vector<float> make_values(int nval, int n_per_val) {
|
||||
// GGML_ASSERT(n_per_val%4 == 0);
|
||||
// std::vector<float> result(nval*n_per_val);
|
||||
// const uint32_t a = 89226354, b = 64248484;
|
||||
// float * data = result.data();
|
||||
// uint32_t aux32;
|
||||
// const uint8_t * q = (const uint8_t *)&aux32;
|
||||
// for (int i = 0; i < nval; ++i) {
|
||||
// uint32_t x = i + 32767;
|
||||
// for (int k = 0; k < n_per_val/4; ++k) {
|
||||
// x = a*x + b;
|
||||
// aux32 = x & 0x0f0f0f0f;
|
||||
// for (int l = 0; l < 4; ++l) data[4*k+l] = scale_values[q[l]];
|
||||
// }
|
||||
// data += n_per_val;
|
||||
// }
|
||||
// return result;
|
||||
//}
|
||||
|
||||
static std::vector<float> make_values(int nval, int n_per_val) {
|
||||
std::vector<float> result(nval*n_per_val);
|
||||
uint16_t m16 = ggml_fp32_to_fp16(0.922f);
|
||||
@@ -261,13 +289,33 @@ static std::vector<float> make_values(int nval, int n_per_val) {
|
||||
for (int k = 0; k < n_per_val; ++k) {
|
||||
x = a*x + b;
|
||||
uint32_t s = (x & 0b10001111111111111000111111111111) ^ m32;
|
||||
data[k] = ggml_fp16_to_fp32(s & 65535) + ggml_fp16_to_fp32(s >> 16);
|
||||
float val = ggml_fp16_to_fp32(s & 65535) + ggml_fp16_to_fp32(s >> 16);
|
||||
//int ival = nearest_int(31.5f*val);
|
||||
int ival = nearest_int(16.f*val);
|
||||
data[k] = ival;
|
||||
//data[k] = ggml_fp16_to_fp32(s & 65535) + ggml_fp16_to_fp32(s >> 16);
|
||||
}
|
||||
data += n_per_val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//static std::vector<float> make_values(int nval, int n_per_val) {
|
||||
// std::vector<float> result(nval*n_per_val);
|
||||
// const uint32_t a = 34038481, b = 76625530;
|
||||
// float * data = result.data();
|
||||
// for (int i = 0; i < nval; ++i) {
|
||||
// uint32_t x = i + 4096;
|
||||
// for (int k = 0; k < n_per_val; ++k) {
|
||||
// x = a*x + b;
|
||||
// uint32_t s = (x & 255) + ((x >> 8) & 255) + ((x >> 16) & 255) + ((x >> 24) & 255);
|
||||
// data[k] = (s - 510.f)/147.8f;
|
||||
// }
|
||||
// data += n_per_val;
|
||||
// }
|
||||
// return result;
|
||||
//}
|
||||
|
||||
#ifdef __AVX2__
|
||||
static inline float hsum_float_4(__m128 x) {
|
||||
x = _mm_add_ps(x, _mm_movehl_ps(x, x));
|
||||
@@ -287,13 +335,24 @@ static __m256 hsum_float_8x8(__m256 * accm) {
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int nearest_int(float fval) {
|
||||
assert(fval <= 4194303.f);
|
||||
float val = fval + 12582912.f;
|
||||
int i; memcpy(&i, &val, sizeof(int));
|
||||
return (i & 0x007fffff) - 0x00400000;
|
||||
const int8_t scale_index[241] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 17, 17, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 21, 21, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 22, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 23, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 24, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 26, 26,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 27, 27, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 28, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 30, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
|
||||
};
|
||||
inline int best_index_scale(const int8_t * values, float x) {
|
||||
int ix = (int)x - values[0];
|
||||
if (ix < 0 || ix >= 241) return ix < 0 ? 0 : 15;
|
||||
ix = scale_index[ix];
|
||||
return ix < 16 ? ix : x - values[ix-16] < values[ix-15] - x ? ix-16 : ix-15;
|
||||
}
|
||||
|
||||
|
||||
static void analyze_x(const char * name, int nrows, int n_per_row, const float * values, float& tot_mse, float& tot_mse_q, float& tot_elements) {
|
||||
constexpr int kNumVal = 1 << 12;
|
||||
constexpr int kBlockSize = 8;
|
||||
@@ -384,28 +443,57 @@ static void analyze_x(const char * name, int nrows, int n_per_row, const float *
|
||||
lmse += diff*diff;
|
||||
}
|
||||
}
|
||||
for (int ibl = 0; ibl < n_per_row/kSuperBlockSize; ++ibl) {
|
||||
auto sb = scales.data() + ibl*(kSuperBlockSize/kBlockSize);
|
||||
auto idx = best_idx.data() + ibl*(kSuperBlockSize/kBlockSize);
|
||||
auto xbl = xr + ibl*kSuperBlockSize;
|
||||
float amax_scale = 0;
|
||||
for (int ib = 0; ib < kSuperBlockSize/kBlockSize; ++ib) {
|
||||
amax_scale = std::max(amax_scale, std::abs(sb[ib]));
|
||||
}
|
||||
float id = amax_scale > 0 ? 15/amax_scale : 0;
|
||||
float d = amax_scale/15;
|
||||
for (int ib = 0; ib < kSuperBlockSize/kBlockSize; ++ib) {
|
||||
int ls = nearest_int(0.5f*(id*sb[ib]+15));
|
||||
ls = std::max(0, std::min(ls, 15));
|
||||
float dl = d*(2*ls - 15);
|
||||
auto xb = xbl + kBlockSize*ib;
|
||||
auto qv = codes.data() + kBlockSize*idx[ib];
|
||||
for (int k = 0; k < kBlockSize; ++k) {
|
||||
float diff = xb[k] - dl*qv[k];
|
||||
lmse_q += diff*diff;
|
||||
}
|
||||
float amax_scale = std::abs(scales[0]);
|
||||
float max_scale = scales[0];
|
||||
for (int ib = 1; ib < n_per_row/kBlockSize; ++ib) {
|
||||
float ax = std::abs(scales[ib]);
|
||||
if (ax > amax_scale) {
|
||||
amax_scale = ax;
|
||||
max_scale = scales[ib];
|
||||
}
|
||||
}
|
||||
float d = max_scale/scale_values[0];
|
||||
float id = d ? 1/d : 0.f;
|
||||
for (int ib = 0; ib < n_per_row/kBlockSize; ++ib) {
|
||||
int ls = best_index_scale(scale_values, id*scales[ib]);
|
||||
float dl = d * scale_values[ls];
|
||||
auto xb = xr + kBlockSize*ib;
|
||||
auto qv = codes.data() + kBlockSize*best_idx[ib];
|
||||
for (int k = 0; k < kBlockSize; ++k) {
|
||||
float diff = xb[k] - dl*qv[k];
|
||||
lmse_q += diff*diff;
|
||||
}
|
||||
}
|
||||
//for (int ibl = 0; ibl < n_per_row/kSuperBlockSize; ++ibl) {
|
||||
// auto sb = scales.data() + ibl*(kSuperBlockSize/kBlockSize);
|
||||
// auto idx = best_idx.data() + ibl*(kSuperBlockSize/kBlockSize);
|
||||
// auto xbl = xr + ibl*kSuperBlockSize;
|
||||
// float amax_scale = 0, max_scale = 0;
|
||||
// for (int ib = 0; ib < kSuperBlockSize/kBlockSize; ++ib) {
|
||||
// float ax = std::abs(sb[ib]);
|
||||
// if (ax > amax_scale) {
|
||||
// amax_scale = ax; max_scale = sb[ib];
|
||||
// }
|
||||
// //amax_scale = std::max(amax_scale, std::abs(sb[ib]));
|
||||
// }
|
||||
// float d = max_scale/scale_values[0];
|
||||
// float id = d ? 1/d : 0.f;
|
||||
// //float id = amax_scale > 0 ? 15/amax_scale : 0;
|
||||
// //float d = amax_scale/15;
|
||||
// for (int ib = 0; ib < kSuperBlockSize/kBlockSize; ++ib) {
|
||||
// int ls = best_index_scale(scale_values, id*sb[ib]);
|
||||
// float dl = d * scale_values[ls];
|
||||
// //int ls = nearest_int(0.5f*(id*sb[ib]+15));
|
||||
// //ls = std::max(0, std::min(ls, 15));
|
||||
// //float dl = d*(2*ls - 15);
|
||||
// auto xb = xbl + kBlockSize*ib;
|
||||
// auto qv = codes.data() + kBlockSize*idx[ib];
|
||||
// for (int k = 0; k < kBlockSize; ++k) {
|
||||
// float diff = xb[k] - dl*qv[k];
|
||||
// lmse_q += diff*diff;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user