mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 07:34:10 +00:00
Refactor iqk: Factor out GEMM for 1-bit quants (ABX2/AVX512)
This commit is contained in:
@@ -264,6 +264,7 @@ if (GGML_IQK_MUL_MAT)
|
||||
iqk/iqk_gemm_kquants.cpp
|
||||
iqk/iqk_gemm_iquants.cpp
|
||||
iqk/iqk_gemm_iqk_quants.cpp
|
||||
iqk/iqk_gemm_1bit.cpp
|
||||
iqk/iqk_gemm_legacy_quants.cpp)
|
||||
set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h
|
||||
iqk/iqk_flash_impl.h
|
||||
@@ -271,6 +272,7 @@ if (GGML_IQK_MUL_MAT)
|
||||
iqk/iqk_gemm_kquants.h
|
||||
iqk/iqk_gemm_iquants.h
|
||||
iqk/iqk_gemm_iqk_quants.h
|
||||
iqk/iqk_gemm_1bit.h
|
||||
iqk/iqk_gemm_legacy_quants.h)
|
||||
if (GGML_IQK_FLASH_ATTENTION)
|
||||
message(STATUS "Enabling IQK Flash Attention kernels")
|
||||
|
||||
1104
ggml/src/iqk/iqk_gemm_1bit.cpp
Normal file
1104
ggml/src/iqk/iqk_gemm_1bit.cpp
Normal file
File diff suppressed because it is too large
Load Diff
11
ggml/src/iqk/iqk_gemm_1bit.h
Normal file
11
ggml/src/iqk/iqk_gemm_1bit.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "iqk_common.h"
|
||||
|
||||
#ifdef IQK_IMPLEMENT
|
||||
|
||||
#include <array>
|
||||
|
||||
bool iqk_set_kernels_1bit(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user