mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-26 08:04:09 +00:00
iq3_xxs_r4: 1st shot on Zen4
PP-512: 107 t/s -> 137 t/s TG-128(1 thread): 2.64 t/s -> 3.44 t/s
This commit is contained in:
@@ -34,6 +34,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||
{ "Q2_K_R4", LLAMA_FTYPE_MOSTLY_Q2_K_R4, "Q2_K_S repacked", },
|
||||
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
|
||||
{ "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization", },
|
||||
{ "IQ3_XXS_R4",LLAMA_FTYPE_MOSTLY_IQ3_XXS_R4,"IQ3_XXS repacked", },
|
||||
{ "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization", },
|
||||
{ "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix", },
|
||||
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
|
||||
|
||||
Reference in New Issue
Block a user