mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-04-20 14:29:51 +00:00
perf.py: Error out if test length > cache size
This commit is contained in:
@@ -119,6 +119,9 @@ def measure_generate(args, model, cache, warmup = False):
|
||||
@torch.inference_mode()
|
||||
def main(args):
|
||||
|
||||
assert args.max_length <= args.cache_size, \
|
||||
"max_length cannot exceed cache size"
|
||||
|
||||
model, config, cache, tokenizer = model_init.init(args, max_chunk_size = args.chunk_size)
|
||||
bpw_layer, bpw_head, vram_bits = model.get_storage_info()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user