* iq3_s_r4: WIP

* iq3_s_r4: Zen4

* iq3_s_r4: slightly better Zen4

* iq3_s_r4: AVX2

* iq3_s_r4: NEON

* iq3_s_r4: rearrange quants

* iq3_s_r4: rearranged quants - AVX2

* iq3_s_r4: rearranged quants - NEON

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2024-12-23 14:34:23 +01:00
committed by GitHub
parent aa2595415a
commit da3bfd1009
10 changed files with 394 additions and 47 deletions

View File

@@ -422,6 +422,7 @@ extern "C" {
GGML_TYPE_IQ2_XS_R4 = 217,
GGML_TYPE_IQ3_XXS_R4= 218,
GGML_TYPE_IQ4_NL_R4 = 220,
GGML_TYPE_IQ3_S_R4 = 221,
GGML_TYPE_IQ2_S_R4 = 222,
GGML_TYPE_IQ4_XS_R4 = 223,
GGML_TYPE_BF16_R16 = 230,
@@ -504,6 +505,7 @@ extern "C" {
GGML_FTYPE_MOSTLY_IQ2_XS_R4 = 216, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ3_XXS_R4= 217, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ4_NL_R4 = 219, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ3_S_R4 = 220, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ2_S_R4 = 221, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ4_XS_R4 = 222, // except 1d tensors
GGML_FTYPE_MOSTLY_BF16_R16 = 224, // except 1d tensors