Add Q6 and Q8 cache modes

This commit is contained in:
turboderp
2024-06-09 21:56:20 +02:00
parent 47f3831cae
commit 396a56f037
2 changed files with 7 additions and 1 deletions

View File

@@ -347,7 +347,7 @@ export class ModelView {
this.tb_seq_len = new controls.LabelNumbox("model-view-item-left", "Context length", "model-view-item-textbox shortright", "", this.modelInfo, "seq_len", 32, 1024*1024, 0, () => { this.send() } );
this.tb_rope_scale = new controls.LabelNumbox("model-view-item-left", "RoPE scale", "model-view-item-textbox shortright", "", this.modelInfo, "rope_scale", 0.01, 1000, 2, () => { this.send() } );
this.tb_rope_alpha = new controls.LabelNumbox("model-view-item-left", "RoPE alpha", "model-view-item-textbox shortright", "", this.modelInfo, "rope_alpha", 0.01, 1000, 2, () => { this.send() } );
this.cb_cache_mode = new controls.LabelCombobox("model-view-item-left", "Cache mode", "model-view-item-combobox short", [ "FP16", "FP8", "Q4" ], this.modelInfo, "cache_mode", () => { this.send() } );
this.cb_cache_mode = new controls.LabelCombobox("model-view-item-left", "Cache mode", "model-view-item-combobox short", [ "FP16", "FP8", "Q4", "Q6", "Q8" ], this.modelInfo, "cache_mode", () => { this.send() } );
this.tb_chunk_size = new controls.LabelNumbox("model-view-item-left", "Chunk size", "model-view-item-textbox shortright", "", this.modelInfo, "chunk_size", 32, 1024*1024, 0, () => { this.send() } );
this.tb_gpu_split = new controls.LabelTextbox("model-view-item-left", "GPU split", "model-view-item-textbox short", "8.5,12", this.modelInfo, "gpu_split", null, () => { this.send() }, "gpu_split_auto" );
// this.chbk_ngram = new controls.LabelCheckbox("model-view-item-left", "N-gram decoding", "model-view-item-right checkbox", "Enabled", this.modelInfo, "speculative_ngram", () => { this.send() } );