mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-20 14:29:28 +00:00
Check length of gpu_split in model_init
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
|
||||
import argparse, sys, os, glob, time
|
||||
import torch
|
||||
|
||||
from exllamav2 import(
|
||||
ExLlamaV2,
|
||||
@@ -167,6 +168,9 @@ def post_init_load(
|
||||
split = None
|
||||
if args.gpu_split and args.gpu_split != "auto":
|
||||
split = [float(alloc) for alloc in args.gpu_split.split(",")]
|
||||
if len(split) > torch.cuda.device_count():
|
||||
print(f" ## Error: Too many entries in gpu_split. {torch.cuda.device_count()} CUDA devices are available.")
|
||||
sys.exit()
|
||||
|
||||
if args.tensor_parallel:
|
||||
if args.gpu_split == "auto": split = None
|
||||
|
||||
Reference in New Issue
Block a user