mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-26 01:19:20 +00:00
CLI - Specify GGML_TYPE to quantize for the main tensors. (#91)
To complement the token_embd.weight and output.weight : attn_v.weight attn_k.weight. attn_q_weight attn_output.weight attn_qkv.weight ffn_gate ffn_down ffn_up
This commit is contained in:
@@ -361,6 +361,14 @@ extern "C" {
|
||||
enum llama_ftype ftype; // quantize to this llama_ftype
|
||||
enum ggml_type output_tensor_type; // output tensor type
|
||||
enum ggml_type token_embedding_type; // token embeddings tensor type
|
||||
enum ggml_type attn_q_type; // attention query tensor type
|
||||
enum ggml_type attn_k_type; // attention key tensor type
|
||||
enum ggml_type attn_v_type; // attention value tensor type
|
||||
enum ggml_type attn_qkv_type; // attention query-key-value tensor type
|
||||
enum ggml_type attn_output_type; // attention output tensor type
|
||||
enum ggml_type ffn_gate_type; // feedforward network gate type
|
||||
enum ggml_type ffn_down_type; // feedforward network down type
|
||||
enum ggml_type ffn_up_type; // feedforward network up type
|
||||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
|
||||
Reference in New Issue
Block a user