mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 03:11:51 +00:00
Update GGML_QUANT_SIZES
This commit is contained in:
@@ -1349,39 +1349,89 @@ class GGUFValueType(IntEnum):
|
||||
|
||||
# Items here are (block size, type size)
|
||||
QK_K = 256
|
||||
|
||||
#Values generated programatically
|
||||
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||
GGMLQuantizationType.F32: (1, 4),
|
||||
GGMLQuantizationType.F16: (1, 2),
|
||||
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
||||
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
||||
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q8_0: (32, 2 + 32),
|
||||
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
|
||||
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
||||
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
|
||||
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
||||
GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
|
||||
GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
|
||||
GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
|
||||
GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
|
||||
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
|
||||
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
|
||||
GGMLQuantizationType.I8: (1, 1),
|
||||
GGMLQuantizationType.I16: (1, 2),
|
||||
GGMLQuantizationType.I32: (1, 4),
|
||||
GGMLQuantizationType.I64: (1, 8),
|
||||
GGMLQuantizationType.F64: (1, 8),
|
||||
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||
GGMLQuantizationType.BF16: (1, 2),
|
||||
GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
|
||||
GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
|
||||
GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
|
||||
GGMLQuantizationType.F32 : ( 1, 4),
|
||||
GGMLQuantizationType.F16 : ( 1, 2),
|
||||
GGMLQuantizationType.Q4_0 : ( 32, 18),
|
||||
GGMLQuantizationType.Q4_1 : ( 32, 20),
|
||||
GGMLQuantizationType.Q5_0 : ( 32, 22),
|
||||
GGMLQuantizationType.Q5_1 : ( 32, 24),
|
||||
GGMLQuantizationType.Q8_0 : ( 32, 34),
|
||||
GGMLQuantizationType.Q8_1 : ( 32, 36),
|
||||
GGMLQuantizationType.Q2_K : ( 256, 84),
|
||||
GGMLQuantizationType.Q3_K : ( 256, 110),
|
||||
GGMLQuantizationType.Q4_K : ( 256, 144),
|
||||
GGMLQuantizationType.Q5_K : ( 256, 176),
|
||||
GGMLQuantizationType.Q6_K : ( 256, 210),
|
||||
GGMLQuantizationType.Q8_K : ( 256, 292),
|
||||
GGMLQuantizationType.IQ2_XXS : ( 256, 66),
|
||||
GGMLQuantizationType.IQ2_XS : ( 256, 74),
|
||||
GGMLQuantizationType.IQ3_XXS : ( 256, 98),
|
||||
GGMLQuantizationType.IQ1_S : ( 256, 50),
|
||||
GGMLQuantizationType.IQ4_NL : ( 32, 18),
|
||||
GGMLQuantizationType.IQ3_S : ( 256, 110),
|
||||
GGMLQuantizationType.IQ2_S : ( 256, 82),
|
||||
GGMLQuantizationType.IQ4_XS : ( 256, 136),
|
||||
GGMLQuantizationType.I8 : ( 1, 1),
|
||||
GGMLQuantizationType.I16 : ( 1, 2),
|
||||
GGMLQuantizationType.I32 : ( 1, 4),
|
||||
GGMLQuantizationType.I64 : ( 1, 8),
|
||||
GGMLQuantizationType.F64 : ( 1, 8),
|
||||
GGMLQuantizationType.IQ1_M : ( 256, 56),
|
||||
GGMLQuantizationType.BF16 : ( 1, 2),
|
||||
GGMLQuantizationType.Q4_0_4_4 : ( 32, 18),
|
||||
GGMLQuantizationType.Q4_0_4_8 : ( 32, 18),
|
||||
GGMLQuantizationType.Q4_0_8_8 : ( 32, 18),
|
||||
GGMLQuantizationType.I2_S : ( 1, 1),
|
||||
GGMLQuantizationType.Q8_0_X4 : ( 32, 34),
|
||||
GGMLQuantizationType.Q8_1_X4 : ( 32, 36),
|
||||
GGMLQuantizationType.Q8_2_X4 : ( 32, 36),
|
||||
GGMLQuantizationType.Q6_0 : ( 32, 26),
|
||||
GGMLQuantizationType.IQ1_BN : ( 64, 13),
|
||||
GGMLQuantizationType.IQ2_BN : ( 64, 16),
|
||||
GGMLQuantizationType.Q8_K64 : ( 64, 68),
|
||||
GGMLQuantizationType.IQ2_K : ( 256, 76),
|
||||
GGMLQuantizationType.IQ3_K : ( 256, 110),
|
||||
GGMLQuantizationType.IQ4_K : ( 256, 144),
|
||||
GGMLQuantizationType.IQ5_K : ( 256, 176),
|
||||
GGMLQuantizationType.IQ6_K : ( 256, 212),
|
||||
GGMLQuantizationType.IQ4_KS : ( 256, 136),
|
||||
GGMLQuantizationType.IQ2_KS : ( 256, 70),
|
||||
GGMLQuantizationType.IQ4_KSS : ( 256, 128),
|
||||
GGMLQuantizationType.Q8_K16 : ( 64, 64),
|
||||
GGMLQuantizationType.Q8_K32 : ( 256, 292),
|
||||
GGMLQuantizationType.Q8_KR8 : ( 256, 292),
|
||||
GGMLQuantizationType.Q8_K128 : ( 128, 140),
|
||||
GGMLQuantizationType.Q8_KV : ( 32, 32),
|
||||
GGMLQuantizationType.Q4_0_R8 : ( 32, 18),
|
||||
GGMLQuantizationType.Q5_0_R4 : ( 32, 22),
|
||||
GGMLQuantizationType.Q8_0_R8 : ( 32, 34),
|
||||
GGMLQuantizationType.Q2_K_R4 : ( 256, 84),
|
||||
GGMLQuantizationType.Q3_K_R4 : ( 256, 110),
|
||||
GGMLQuantizationType.Q4_K_R4 : ( 256, 144),
|
||||
GGMLQuantizationType.Q5_K_R4 : ( 256, 176),
|
||||
GGMLQuantizationType.Q6_K_R4 : ( 256, 210),
|
||||
GGMLQuantizationType.IQ2_XXS_R4 : ( 256, 66),
|
||||
GGMLQuantizationType.IQ2_XS_R4 : ( 256, 74),
|
||||
GGMLQuantizationType.IQ3_XXS_R4 : ( 256, 98),
|
||||
GGMLQuantizationType.IQ1_S_R4 : ( 32, 6),
|
||||
GGMLQuantizationType.IQ4_NL_R4 : ( 32, 18),
|
||||
GGMLQuantizationType.IQ3_S_R4 : ( 256, 110),
|
||||
GGMLQuantizationType.IQ2_S_R4 : ( 256, 82),
|
||||
GGMLQuantizationType.IQ4_XS_R8 : ( 256, 136),
|
||||
GGMLQuantizationType.IQ1_M_R4 : ( 32, 7),
|
||||
GGMLQuantizationType.BF16_R16 : ( 1, 2),
|
||||
GGMLQuantizationType.Q6_0_R4 : ( 32, 26),
|
||||
GGMLQuantizationType.IQ2_BN_R4 : ( 64, 16),
|
||||
GGMLQuantizationType.IQ2_K_R4 : ( 256, 76),
|
||||
GGMLQuantizationType.IQ3_K_R4 : ( 256, 110),
|
||||
GGMLQuantizationType.IQ4_K_R4 : ( 256, 144),
|
||||
GGMLQuantizationType.IQ5_K_R4 : ( 256, 176),
|
||||
GGMLQuantizationType.IQ4_KS_R4 : ( 256, 136),
|
||||
GGMLQuantizationType.Q8_KV_R8 : ( 32, 32),
|
||||
GGMLQuantizationType.Q8_K_R8 : ( 256, 258),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user