Refactor iqk: Factor out GEMM for 1-bit quants (ABX2/AVX512)

This commit is contained in:
Iwan Kawrakow
2025-05-17 18:28:24 +03:00
parent 082a9bd632
commit 9b6e75cb79
4 changed files with 1119 additions and 1084 deletions

View File

@@ -264,6 +264,7 @@ if (GGML_IQK_MUL_MAT)
iqk/iqk_gemm_kquants.cpp
iqk/iqk_gemm_iquants.cpp
iqk/iqk_gemm_iqk_quants.cpp
iqk/iqk_gemm_1bit.cpp
iqk/iqk_gemm_legacy_quants.cpp)
set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h
iqk/iqk_flash_impl.h
@@ -271,6 +272,7 @@ if (GGML_IQK_MUL_MAT)
iqk/iqk_gemm_kquants.h
iqk/iqk_gemm_iquants.h
iqk/iqk_gemm_iqk_quants.h
iqk/iqk_gemm_1bit.h
iqk/iqk_gemm_legacy_quants.h)
if (GGML_IQK_FLASH_ATTENTION)
message(STATUS "Enabling IQK Flash Attention kernels")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
#pragma once
#include "iqk_common.h"
#ifdef IQK_IMPLEMENT
#include <array>
bool iqk_set_kernels_1bit(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels);
#endif

File diff suppressed because it is too large Load Diff