mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
exllamav2: Make sure cache size is set in unpaged mode
This commit is contained in:
@@ -272,6 +272,7 @@ class ExllamaV2Container(BaseModelContainer):
|
||||
self.config.max_seq_len = unwrap(
|
||||
user_max_seq_len, min(hf_model.hf_config.max_position_embeddings, 4096)
|
||||
)
|
||||
self.cache_size = self.config.max_seq_len
|
||||
|
||||
# Set the rope scale
|
||||
self.config.scale_pos_emb = unwrap(
|
||||
|
||||
Reference in New Issue
Block a user