iq3_xxs_r4: 1st shot on Zen4

PP-512: 107 t/s -> 137 t/s
TG-128(1 thread): 2.64 t/s -> 3.44 t/s
This commit is contained in:
Iwan Kawrakow
2024-12-19 12:19:34 +02:00
parent baa9ed4a5e
commit 19acd7ce1e
10 changed files with 252 additions and 5 deletions

View File

@@ -418,6 +418,7 @@ extern "C" {
GGML_TYPE_Q4_K_R4 = 212,
GGML_TYPE_Q5_K_R4 = 213,
GGML_TYPE_Q6_K_R4 = 214,
GGML_TYPE_IQ3_XXS_R4= 218,
GGML_TYPE_IQ4_NL_R4 = 220,
GGML_TYPE_IQ4_XS_R4 = 223,
GGML_TYPE_BF16_R16 = 230,
@@ -496,6 +497,7 @@ extern "C" {
GGML_FTYPE_MOSTLY_Q4_K_R4 = 212, // except 1d tensors
GGML_FTYPE_MOSTLY_Q5_K_R4 = 215, // except 1d tensors
GGML_FTYPE_MOSTLY_Q6_K_R4 = 214, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ3_XXS_R4= 217, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ4_NL_R4 = 219, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ4_XS_R4 = 222, // except 1d tensors
GGML_FTYPE_MOSTLY_BF16_R16 = 224, // except 1d tensors