mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Model: Change cache_size/max_seq_len behavior
- Cache size is now given only by the cache_size config option. Default is 4096 (user should always override to max out VRAM) - max_seq_len, if not overridden in the config, will default to the model's config.json - max_seq_len is reduced to be no larger than the cache
This commit is contained in:
@@ -157,10 +157,8 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
|
||||
|
||||
# Override the max sequence length based on user
|
||||
max_seq_len = kwargs.get("max_seq_len")
|
||||
if max_seq_len == -1:
|
||||
if max_seq_len == -1 or max_seq_len is None:
|
||||
kwargs["max_seq_len"] = hf_model.hf_config.max_position_embeddings
|
||||
elif max_seq_len is None:
|
||||
kwargs["max_seq_len"] = 4096
|
||||
|
||||
# Create a new container and check if the right dependencies are installed
|
||||
backend = unwrap(kwargs.get("backend"), detect_backend(hf_model))
|
||||
|
||||
Reference in New Issue
Block a user