iq5_k_r4: Zen4

Much slower than the others.
This commit is contained in:
Iwan Kawrakow
2024-12-18 09:17:30 +02:00
parent 9b6d14a299
commit 5eac4edc90
10 changed files with 349 additions and 2 deletions

View File

@@ -61,6 +61,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
{ "IQ4_K", LLAMA_FTYPE_MOSTLY_IQ4_K, " 4.5 bpw non-linear quantization", },
{ "IQ4_K_R4", LLAMA_FTYPE_MOSTLY_IQ4_K_R4, "IQ4_K repacked", },
{ "IQ5_K", LLAMA_FTYPE_MOSTLY_IQ5_K, " 5.5 bpw non-linear quantization", },
{ "IQ5_K_R4", LLAMA_FTYPE_MOSTLY_IQ5_K_R4, "IQ5_K repacked", },
{ "IQ6_K", LLAMA_FTYPE_MOSTLY_IQ6_K, " 6.6 bpw non-linear quantization", },
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
{ "Q4_K_R4", LLAMA_FTYPE_MOSTLY_Q4_K_R4, "Q4_K_S repacked", },