From fece4791ad348d885bb9d80d4875ad1de84d7f50 Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Thu, 6 Nov 2025 21:03:24 +0100 Subject: [PATCH] exllamav2: Make sure cache size is set in unpaged mode --- backends/exllamav2/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index ae71b00..6e59dbe 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -272,6 +272,7 @@ class ExllamaV2Container(BaseModelContainer): self.config.max_seq_len = unwrap( user_max_seq_len, min(hf_model.hf_config.max_position_embeddings, 4096) ) + self.cache_size = self.config.max_seq_len # Set the rope scale self.config.scale_pos_emb = unwrap(