exllamav2: Make sure cache size is set in unpaged mode

This commit is contained in:
turboderp
2025-11-06 21:03:24 +01:00
parent 368e87eb7d
commit fece4791ad

View File

@@ -272,6 +272,7 @@ class ExllamaV2Container(BaseModelContainer):
self.config.max_seq_len = unwrap(
user_max_seq_len, min(hf_model.hf_config.max_position_embeddings, 4096)
)
self.cache_size = self.config.max_seq_len
# Set the rope scale
self.config.scale_pos_emb = unwrap(