bitnet: CUDA, scalar, AVX2

2026-04-23 07:59:25 +00:00 · 2024-06-16 15:56:32 +03:00
parent 81576cdcac
commit eecd48eab5
14 changed files with 638 additions and 1 deletions
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -26,6 +26,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
    { "IQ2_M",  LLAMA_FTYPE_MOSTLY_IQ2_M,  " 2.7  bpw quantization",            },
    { "IQ1_S",  LLAMA_FTYPE_MOSTLY_IQ1_S,  " 1.56 bpw quantization",            },
    { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.75 bpw quantization",            },
+    { "IQ1_BN", LLAMA_FTYPE_MOSTLY_IQ1_BN, " 1.75 bpw quantization (Bitnet)",   },
    { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
    { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
    { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization",            },