mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-20 14:28:54 +00:00
Model: Fix max seq len handling
Previously, the max sequence length was overriden by the user's config and never took the model's config.json into account. Now, set the default to 4096, but include config.prepare when selecting the max sequence length. The yaml and API request now serve as overrides rather than parameters. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
12
model.py
12
model.py
@@ -79,13 +79,21 @@ class ModelContainer:
|
||||
|
||||
self.config = ExLlamaV2Config()
|
||||
self.config.model_dir = str(model_directory.resolve())
|
||||
|
||||
# Make the max seq len 4096 before preparing the config
|
||||
# This is a better default than 2038
|
||||
self.config.max_seq_len = 4096
|
||||
self.config.prepare()
|
||||
|
||||
# Then override the max_seq_len if present
|
||||
override_max_seq_len = kwargs.get("max_seq_len")
|
||||
if override_max_seq_len:
|
||||
self.config.max_seq_len = kwargs.get("max_seq_len")
|
||||
|
||||
# Grab the base model's sequence length before overrides for rope calculations
|
||||
base_seq_len = self.config.max_seq_len
|
||||
|
||||
# Then override the max_seq_len if present
|
||||
self.config.max_seq_len = unwrap(kwargs.get("max_seq_len"), 4096)
|
||||
# Set the rope scale
|
||||
self.config.scale_pos_emb = unwrap(kwargs.get("rope_scale"), 1.0)
|
||||
|
||||
# Automatically calculate rope alpha
|
||||
|
||||
Reference in New Issue
Block a user