q2_K: allow it to detect ternary nets and quantize accordingly

2026-04-28 18:32:04 +00:00 · 2024-08-05 11:59:36 +03:00
parent c11c7c8cae
commit b409c15363
4 changed files with 55 additions and 4 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -359,6 +359,7 @@ extern "C" {
        bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        bool pure;                           // quantize all tensors to the default type
        bool keep_split;                     // quantize to the same number of shards
+        bool ignore_imatrix_rules;           // If set to true, the built-in rules for refusing to quantize into certain quants without imatrix are ignored
        void * imatrix;                      // pointer to importance matrix data
        void * kv_overrides;                 // pointer to vector containing overrides
    } llama_model_quantize_params;