Repack a model with the quantize tool

2026-02-27 08:34:09 +00:00 · 2025-03-20 09:11:33 +02:00
parent 127c6ee649
commit 20df7b89c8
6 changed files with 149 additions and 19 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -416,6 +416,7 @@ extern "C" {
        bool pure;                           // quantize all tensors to the default type
        bool keep_split;                     // quantize to the same number of shards
        bool ignore_imatrix_rules;           // If set to true, the built-in rules for refusing to quantize into certain quants without imatrix are ignored
+        bool only_repack;                    // Only repack tensors
        void * imatrix;                      // pointer to importance matrix data
        void * kv_overrides;                 // pointer to vector containing overrides
        void * custom_quants;                // pointer to vector containing custom quantization rules