diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index ae71b00..6e59dbe 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -272,6 +272,7 @@ class ExllamaV2Container(BaseModelContainer): self.config.max_seq_len = unwrap( user_max_seq_len, min(hf_model.hf_config.max_position_embeddings, 4096) ) + self.cache_size = self.config.max_seq_len # Set the rope scale self.config.scale_pos_emb = unwrap(