mirror of
https://github.com/turboderp-org/exui.git
synced 2026-04-19 22:08:58 +00:00
Add Q6 and Q8 cache modes
This commit is contained in:
@@ -8,6 +8,8 @@ from exllamav2 import(
|
||||
ExLlamaV2Cache,
|
||||
ExLlamaV2Cache_8bit,
|
||||
ExLlamaV2Cache_Q4,
|
||||
ExLlamaV2Cache_Q6,
|
||||
ExLlamaV2Cache_Q8,
|
||||
ExLlamaV2Tokenizer,
|
||||
)
|
||||
|
||||
@@ -299,6 +301,10 @@ class ModelContainer:
|
||||
self.cache = ExLlamaV2Cache_8bit(self.model, lazy = auto_split)
|
||||
elif self.model_dict["cache_mode"] == "Q4":
|
||||
self.cache = ExLlamaV2Cache_Q4(self.model, lazy = auto_split)
|
||||
elif self.model_dict["cache_mode"] == "Q6":
|
||||
self.cache = ExLlamaV2Cache_Q6(self.model, lazy=auto_split)
|
||||
elif self.model_dict["cache_mode"] == "Q8":
|
||||
self.cache = ExLlamaV2Cache_Q8(self.model, lazy=auto_split)
|
||||
else:
|
||||
raise ValueError("Unknown cache mode: " + self.model_dict["cache_mode"])
|
||||
|
||||
|
||||
@@ -347,7 +347,7 @@ export class ModelView {
|
||||
this.tb_seq_len = new controls.LabelNumbox("model-view-item-left", "Context length", "model-view-item-textbox shortright", "", this.modelInfo, "seq_len", 32, 1024*1024, 0, () => { this.send() } );
|
||||
this.tb_rope_scale = new controls.LabelNumbox("model-view-item-left", "RoPE scale", "model-view-item-textbox shortright", "", this.modelInfo, "rope_scale", 0.01, 1000, 2, () => { this.send() } );
|
||||
this.tb_rope_alpha = new controls.LabelNumbox("model-view-item-left", "RoPE alpha", "model-view-item-textbox shortright", "", this.modelInfo, "rope_alpha", 0.01, 1000, 2, () => { this.send() } );
|
||||
this.cb_cache_mode = new controls.LabelCombobox("model-view-item-left", "Cache mode", "model-view-item-combobox short", [ "FP16", "FP8", "Q4" ], this.modelInfo, "cache_mode", () => { this.send() } );
|
||||
this.cb_cache_mode = new controls.LabelCombobox("model-view-item-left", "Cache mode", "model-view-item-combobox short", [ "FP16", "FP8", "Q4", "Q6", "Q8" ], this.modelInfo, "cache_mode", () => { this.send() } );
|
||||
this.tb_chunk_size = new controls.LabelNumbox("model-view-item-left", "Chunk size", "model-view-item-textbox shortright", "", this.modelInfo, "chunk_size", 32, 1024*1024, 0, () => { this.send() } );
|
||||
this.tb_gpu_split = new controls.LabelTextbox("model-view-item-left", "GPU split", "model-view-item-textbox short", "8.5,12", this.modelInfo, "gpu_split", null, () => { this.send() }, "gpu_split_auto" );
|
||||
// this.chbk_ngram = new controls.LabelCheckbox("model-view-item-left", "N-gram decoding", "model-view-item-right checkbox", "Enabled", this.modelInfo, "speculative_ngram", () => { this.send() } );
|
||||
|
||||
Reference in New Issue
Block a user