Model: Fix generate window fallback

Use max_seq_len as the numerator, not the max_tokens. Mismatched
parameter.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-02-06 14:48:42 -05:00
parent 543a9b68c8
commit fedebadc81

View File

@@ -515,7 +515,7 @@ class ExllamaV2Container:
max_tokens = unwrap(kwargs.get("max_tokens"), 150)
stream_interval = unwrap(kwargs.get("stream_interval"), 0)
generate_window = max(
unwrap(kwargs.get("generate_window"), 512), max_tokens // 8
unwrap(kwargs.get("generate_window"), 512), self.config.max_seq_len // 8
)
# Sampler settings