Bitnet: tiny bity faster 1.625 bpw variant on Metal

We get 70.7 t/s for TG-128 vs 69.5 t/s before.
This commit is contained in:
Kawrakow
2024-06-24 16:42:30 +02:00
parent f2a82090df
commit 8542b4f359
3 changed files with 21 additions and 18 deletions

View File

@@ -4130,7 +4130,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.75 bpw";
case LLAMA_FTYPE_MOSTLY_IQ1_BN :return "IQ1_BN - 1.75 bpw Bitnet";
case LLAMA_FTYPE_MOSTLY_IQ1_BN :return "IQ1_BN - 1.625 bpw Bitnet";
case LLAMA_FTYPE_MOSTLY_IQ2_BN :return "IQ2_BN - 2.00 bpw Bitnet";
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";