From 0d949d00b919f8781ce63fcb00382bf55d60c01b Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Sat, 3 May 2025 18:32:30 +0200 Subject: [PATCH] Model: Set default max_batch_size --- backends/exllamav3/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 554a82d..e430c04 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -180,7 +180,7 @@ class ExllamaV3Container(BaseModelContainer): self.cache = Cache(self.model, max_num_tokens=self.cache_size) # Max batch size - self.max_batch_size = kwargs.get("max_batch_size") + self.max_batch_size = unwrap(kwargs.get("max_batch_size"), 256) # Make sure chunk size is >= 256, keep near or below max seq len user_chunk_size = unwrap(kwargs.get("chunk_size"), 2048)