Update LlamaFileType

This commit is contained in:
Saood Karim
2025-03-31 02:12:45 -05:00
parent e98daabcf1
commit a7f026eebb

View File

@@ -1240,50 +1240,71 @@ class ExpertGatingFuncType(IntEnum):
# from llama_ftype in llama.h
# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
class LlamaFileType(IntEnum):
ALL_F32 = 0
MOSTLY_F16 = 1 # except 1d tensors
MOSTLY_Q4_0 = 2 # except 1d tensors
MOSTLY_Q4_1 = 3 # except 1d tensors
# MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
# MOSTLY_Q4_2 = 5 # support has been removed
# MOSTLY_Q4_3 = 6 # support has been removed
MOSTLY_Q8_0 = 7 # except 1d tensors
MOSTLY_Q5_0 = 8 # except 1d tensors
MOSTLY_Q5_1 = 9 # except 1d tensors
MOSTLY_Q2_K = 10 # except 1d tensors
MOSTLY_Q3_K_S = 11 # except 1d tensors
MOSTLY_Q3_K_M = 12 # except 1d tensors
MOSTLY_Q3_K_L = 13 # except 1d tensors
MOSTLY_Q4_K_S = 14 # except 1d tensors
MOSTLY_Q4_K_M = 15 # except 1d tensors
MOSTLY_Q5_K_S = 16 # except 1d tensors
MOSTLY_Q5_K_M = 17 # except 1d tensors
MOSTLY_Q6_K = 18 # except 1d tensors
MOSTLY_IQ2_XXS = 19 # except 1d tensors
MOSTLY_IQ2_XS = 20 # except 1d tensors
MOSTLY_Q2_K_S = 21 # except 1d tensors
MOSTLY_IQ3_XS = 22 # except 1d tensors
MOSTLY_IQ3_XXS = 23 # except 1d tensors
MOSTLY_IQ1_S = 24 # except 1d tensors
MOSTLY_IQ4_NL = 25 # except 1d tensors
MOSTLY_IQ3_S = 26 # except 1d tensors
MOSTLY_IQ3_M = 27 # except 1d tensors
MOSTLY_IQ2_S = 28 # except 1d tensors
MOSTLY_IQ2_M = 29 # except 1d tensors
MOSTLY_IQ4_XS = 30 # except 1d tensors
MOSTLY_IQ1_M = 31 # except 1d tensors
MOSTLY_BF16 = 32 # except 1d tensors
MOSTLY_Q4_0_4_4 = 33 # except 1d tensors
MOSTLY_Q4_0_4_8 = 34 # except 1d tensors
MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
MOSTLY_IQ1_BN = 36, # except 1d tensors
MOSTLY_IQ2_BN = 37, # except 1d tensors
MOSTLY_IQ2_K = 38, # except 1d tensors
MOSTLY_IQ3_K = 39, # except 1d tensors
MOSTLY_IQ4_K = 40, # except 1d tensors
MOSTLY_IQ5_K = 41, # except 1d tensors
MOSTLY_IQ6_K = 42, # except 1d tensors
MOSTLY_IQ2_TN = 43, # except 1d tensors
ALL_F32 = 0
MOSTLY_F16 = 1 #except 1d tensors
MOSTLY_Q4_0 = 2 #except 1d tensors
MOSTLY_Q4_1 = 3 #except 1d tensors
MOSTLY_Q4_1_SOME_F16 = 4 #tok_embeddings.weight and output.weight are F16
MOSTLY_Q8_0 = 7 #except 1d tensors
MOSTLY_Q5_0 = 8 #except 1d tensors
MOSTLY_Q5_1 = 9 #except 1d tensors
MOSTLY_Q2_K = 10 #except 1d tensors
MOSTLY_Q3_K = 11 #except 1d tensors
MOSTLY_Q4_K = 12 #except 1d tensors
MOSTLY_Q5_K = 13 #except 1d tensors
MOSTLY_Q6_K = 14 #except 1d tensors
MOSTLY_IQ2_XXS = 15 #except 1d tensors
MOSTLY_IQ2_XS = 16 #except 1d tensors
MOSTLY_IQ3_XXS = 17 #except 1d tensors
MOSTLY_IQ1_S = 18 #except 1d tensors
MOSTLY_IQ4_NL = 19 #except 1d tensors
MOSTLY_IQ3_S = 20 #except 1d tensors
MOSTLY_IQ2_S = 21 #except 1d tensors
MOSTLY_IQ4_XS = 22 #except 1d tensors
MOSTLY_IQ1_M = 23 #except 1d tensors
MOSTLY_BF16 = 24 #except 1d tensors
MOSTLY_Q4_0_4_4 = 25 #except 1d tensors
MOSTLY_Q4_0_4_8 = 26 #except 1d tensors
MOSTLY_Q4_0_8_8 = 27 #except 1d tensors
MOSTLY_Q6_0 = 127 #except 1d tensors
MOSTLY_IQ1_BN = 128 #except 1d tensors
MOSTLY_IQ2_BN = 129 #except 1d tensors
MOSTLY_IQ2_K = 130 #except 1d tensors
MOSTLY_IQ3_K = 131 #except 1d tensors
MOSTLY_IQ4_K = 132 #except 1d tensors
MOSTLY_IQ5_K = 133 #except 1d tensors
MOSTLY_IQ6_K = 134 #except 1d tensors
MOSTLY_IQ4_KS = 137 #except 1d tensors
MOSTLY_IQ2_KS = 138 #except 1d tensors
MOSTLY_IQ4_KSS = 139 #except 1d tensors
MOSTLY_Q8_KV = 140 #except 1d tensors
MOSTLY_Q4_0_R8 = 202 #except 1d tensors
MOSTLY_Q8_0_R8 = 207 #except 1d tensors
MOSTLY_Q5_0_R4 = 208 #except 1d tensors
MOSTLY_Q2_K_R4 = 210 #except 1d tensors
MOSTLY_Q3_K_R4 = 211 #except 1d tensors
MOSTLY_Q4_K_R4 = 212 #except 1d tensors
MOSTLY_Q5_K_R4 = 213 #except 1d tensors
MOSTLY_Q6_K_R4 = 214 #except 1d tensors
MOSTLY_IQ2_XXS_R4 = 215 #except 1d tensors
MOSTLY_IQ2_XS_R4 = 216 #except 1d tensors
MOSTLY_IQ3_XXS_R4 = 217 #except 1d tensors
MOSTLY_IQ1_S_R4 = 218 #except 1d tensors
MOSTLY_IQ4_NL_R4 = 219 #except 1d tensors
MOSTLY_IQ3_S_R4 = 220 #except 1d tensors
MOSTLY_IQ2_S_R4 = 221 #except 1d tensors
MOSTLY_IQ4_XS_R8 = 222 #except 1d tensors
MOSTLY_IQ1_M_R4 = 223 #except 1d tensors
MOSTLY_BF16_R16 = 224 #except 1d tensors
MOSTLY_Q6_0_R4 = 227 #except 1d tensors
MOSTLY_IQ2_BN_R4 = 329 #except 1d tensors
MOSTLY_IQ2_K_R4 = 330 #except 1d tensors
MOSTLY_IQ3_K_R4 = 331 #except 1d tensors
MOSTLY_IQ4_K_R4 = 332 #except 1d tensors
MOSTLY_IQ5_K_R4 = 333 #except 1d tensors
MOSTLY_IQ4_KS_R4 = 337 #except 1d tensors
MOSTLY_Q8_KV_R8 = 398 #except 1d tensors
MOSTLY_Q8_K_R8 = 399 #except 1d tensors
GUESSED = 1024 # not specified in the model file