q2_K: allow it to detect ternary nets and quantize accordingly

This commit is contained in:
Iwan Kawrakow
2024-08-05 11:59:36 +03:00
committed by Kawrakow
parent c11c7c8cae
commit b409c15363
4 changed files with 55 additions and 4 deletions

View File

@@ -359,6 +359,7 @@ extern "C" {
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // quantize all tensors to the default type
bool keep_split; // quantize to the same number of shards
bool ignore_imatrix_rules; // If set to true, the built-in rules for refusing to quantize into certain quants without imatrix are ignored
void * imatrix; // pointer to importance matrix data
void * kv_overrides; // pointer to vector containing overrides
} llama_model_quantize_params;