mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 15:14:10 +00:00
Refactor iqk: Factor out GEMM for repacked legacy quants
This commit is contained in:
@@ -138,6 +138,27 @@ typedef void (*mul_mat_t)(int n, const void * vx, size_t bx, const DataInfo& inf
|
||||
|
||||
#define IQK_MAX_NY 8
|
||||
|
||||
#define IQK_SET_MUL_MAT_FUNCTIONS_T(kernel, Dequantizer, funcs) \
|
||||
funcs[0] = kernel<Dequantizer, 1>;\
|
||||
funcs[1] = kernel<Dequantizer, 2>;\
|
||||
funcs[2] = kernel<Dequantizer, 3>;\
|
||||
funcs[3] = kernel<Dequantizer, 4>;\
|
||||
funcs[4] = kernel<Dequantizer, 5>;\
|
||||
funcs[5] = kernel<Dequantizer, 6>;\
|
||||
funcs[6] = kernel<Dequantizer, 7>;\
|
||||
funcs[7] = kernel<Dequantizer, 8>;\
|
||||
|
||||
#define IQK_SET_MUL_MAT_FUNCTIONS(kernel, funcs) \
|
||||
funcs[0] = kernel<1>;\
|
||||
funcs[1] = kernel<2>;\
|
||||
funcs[2] = kernel<3>;\
|
||||
funcs[3] = kernel<4>;\
|
||||
funcs[4] = kernel<5>;\
|
||||
funcs[5] = kernel<6>;\
|
||||
funcs[6] = kernel<7>;\
|
||||
funcs[7] = kernel<8>;\
|
||||
|
||||
|
||||
// ==================================================================================================
|
||||
|
||||
static inline void make_q4_scales(const uint8_t * scales8, uint32_t * aux32) {
|
||||
@@ -234,6 +255,13 @@ static inline __m256i load_iq4nl_values_256() {
|
||||
return MM256_SET_M128I(val128, val128);
|
||||
}
|
||||
|
||||
#ifdef HAVE_FANCY_SIMD
|
||||
static inline __m512i load_iq4nl_values_512() {
|
||||
auto val256 = load_iq4nl_values_256();
|
||||
return _mm512_inserti32x8(_mm512_castsi256_si512(val256), val256, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline __m128i load_iq4k_values_128() {
|
||||
return _mm_loadu_si128((const __m128i *)iq4k_values);
|
||||
}
|
||||
|
||||
@@ -11,11 +11,6 @@ namespace {
|
||||
|
||||
#ifdef HAVE_FANCY_SIMD
|
||||
|
||||
__m512i inline load_iq4nl_values_512() {
|
||||
auto val256 = load_iq4nl_values_256();
|
||||
return _mm512_inserti32x8(_mm512_castsi256_si512(val256), val256, 1);
|
||||
}
|
||||
|
||||
struct IQXKScales {
|
||||
IQXKScales(uint8_t shift, int8_t min_val) : eshift(_mm256_set1_epi16(shift)), min(_mm256_set1_epi16(min_val)) {}
|
||||
template <typename Q8>
|
||||
|
||||
@@ -285,11 +285,6 @@ struct DequantizerQ6K final : public BaseDequantizer<block_q6_K> {
|
||||
|
||||
};
|
||||
|
||||
__m512i inline load_iq4nl_values_512() {
|
||||
auto val256 = load_iq4nl_values_256();
|
||||
return _mm512_inserti32x8(_mm512_castsi256_si512(val256), val256, 1);
|
||||
}
|
||||
|
||||
struct DequantizerIQ4XS final : public BaseDequantizer<block_iq4_xs> {
|
||||
DequantizerIQ4XS(const void * vx, size_t bx) : BaseDequantizer(vx, bx), values(load_iq4nl_values_512()) {}
|
||||
template <typename Q8>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,6 @@
|
||||
|
||||
#include <array>
|
||||
|
||||
bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels);
|
||||
bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user