Refactor iqk: Factor out GEMM for 1-bit quants (ABX2/AVX512)

2026-02-25 07:34:10 +00:00 · 2025-05-17 18:28:24 +03:00
parent 082a9bd632
commit 9b6e75cb79
4 changed files with 1119 additions and 1084 deletions
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -264,6 +264,7 @@ if (GGML_IQK_MUL_MAT)
                            iqk/iqk_gemm_kquants.cpp
                            iqk/iqk_gemm_iquants.cpp
                            iqk/iqk_gemm_iqk_quants.cpp
+                            iqk/iqk_gemm_1bit.cpp
                            iqk/iqk_gemm_legacy_quants.cpp)
    set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h
                            iqk/iqk_flash_impl.h
@@ -271,6 +272,7 @@ if (GGML_IQK_MUL_MAT)
                            iqk/iqk_gemm_kquants.h
                            iqk/iqk_gemm_iquants.h
                            iqk/iqk_gemm_iqk_quants.h
+                            iqk/iqk_gemm_1bit.h
                            iqk/iqk_gemm_legacy_quants.h)
    if (GGML_IQK_FLASH_ATTENTION)
        message(STATUS "Enabling IQK Flash Attention kernels")
--- a/ggml/src/iqk/iqk_gemm_1bit.cpp
+++ b/ggml/src/iqk/iqk_gemm_1bit.cpp
--- a/ggml/src/iqk/iqk_gemm_1bit.h
+++ b/ggml/src/iqk/iqk_gemm_1bit.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "iqk_common.h"
+
+#ifdef IQK_IMPLEMENT
+
+#include <array>
+
+bool iqk_set_kernels_1bit(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels);
+
+#endif
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp